diff --git a/.asf.yaml b/.asf.yaml deleted file mode 100644 index fca520f0d9..0000000000 --- a/.asf.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -github: - description: "Apache Hive" - homepage: https://hive.apache.org/ - labels: - - hive - - java - - database - - sql - - apache - - big-data - - hadoop - features: - wiki: false - issues: false - projects: false - enabled_merge_buttons: - squash: true - merge: true - rebase: true -notifications: - commits: commits@hive.apache.org - issues: gitbox@hive.apache.org - pullrequests: gitbox@hive.apache.org - jira_options: link label worklog diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 829791e0a9..b3faf050f1 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2465,19 +2465,6 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "If the number of references to a CTE clause exceeds this threshold, Hive will materialize it\n" + "before executing the main query block. -1 will disable this feature."), - HIVE_OPTIMIZE_BI_ENABLED("hive.optimize.bi.enabled", false, - "Enables query rewrites based on approximate functions(sketches)."), - - HIVE_OPTIMIZE_BI_REWRITE_COUNTDISTINCT_ENABLED("hive.optimize.bi.rewrite.countdistinct.enabled", - true, - "Enables to rewrite COUNT(DISTINCT(X)) queries to be rewritten to use sketch functions."), - - HIVE_OPTIMIZE_BI_REWRITE_COUNT_DISTINCT_SKETCH( - "hive.optimize.bi.rewrite.countdistinct.sketch", "hll", - new StringSet("hll"), - "Defines which sketch type to use when rewriting COUNT(DISTINCT(X)) expressions. " - + "Distinct counting can be done with: hll"), - // Statistics HIVE_STATS_ESTIMATE_STATS("hive.stats.estimate", true, "Estimate statistics in absence of statistics."), diff --git a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index d732004e51..8e643fe844 100644 --- a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -176,8 +176,8 @@ LOAD_INTO_NON_NATIVE(10101, "A non-native table cannot be used as target for LOAD"), LOCKMGR_NOT_SPECIFIED(10102, "Lock manager not specified correctly, set hive.lock.manager"), LOCKMGR_NOT_INITIALIZED(10103, "Lock manager could not be initialized, check hive.lock.manager "), - LOCK_CANNOT_BE_ACQUIRED(10104, "Locks on the underlying objects cannot be acquired, " - + "retry after some time."), + LOCK_CANNOT_BE_ACQUIRED(10104, "Locks on the underlying objects cannot be acquired. " + + "retry after some time"), ZOOKEEPER_CLIENT_COULD_NOT_BE_INITIALIZED(10105, "Check hive.zookeeper.quorum " + "and hive.zookeeper.client.port"), OVERWRITE_ARCHIVED_PART(10106, "Cannot overwrite an archived partition. " + diff --git a/common/src/test/org/apache/hadoop/hive/common/metrics/metrics2/TestCodahaleMetrics.java b/common/src/test/org/apache/hadoop/hive/common/metrics/metrics2/TestCodahaleMetrics.java index 9c4e4753fb..1c49d9575f 100644 --- a/common/src/test/org/apache/hadoop/hive/common/metrics/metrics2/TestCodahaleMetrics.java +++ b/common/src/test/org/apache/hadoop/hive/common/metrics/metrics2/TestCodahaleMetrics.java @@ -154,12 +154,7 @@ public Void call() throws Exception { public void testFileReporting() throws Exception { int runs = 5; String counterName = "count2"; - - // on the first write the metrics writer should initialize stuff - MetricsFactory.getInstance().incrementCounter(counterName); - sleep(5 * REPORT_INTERVAL_MS); - - for (int i = 1; i <= runs; i++) { + for (int i = 0; i < runs; i++) { MetricsFactory.getInstance().incrementCounter(counterName); sleep(REPORT_INTERVAL_MS + REPORT_INTERVAL_MS / 2); Assert.assertEquals(i + 1, getCounterValue(counterName)); diff --git a/data/conf/llap/hive-site.xml b/data/conf/llap/hive-site.xml index 6b28fef3d7..d37c1b5dab 100644 --- a/data/conf/llap/hive-site.xml +++ b/data/conf/llap/hive-site.xml @@ -378,10 +378,4 @@ hive_admin_user - - hive.conf.restricted.list - hive.query.max.length - Using property defined in HiveConf.ConfVars to test System property overriding - - diff --git a/data/conf/llap/hivemetastore-site.xml b/data/conf/llap/hivemetastore-site.xml deleted file mode 100644 index f033caab38..0000000000 --- a/data/conf/llap/hivemetastore-site.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java index 3a8cb39113..ac8922b832 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java @@ -302,9 +302,9 @@ public String next() { try { FileStatus file = files[i]; i++; - return ReplChangeManager.encodeFileUri(file.getPath().toString(), + return ReplChangeManager.getInstance(conf).encodeFileUri(file.getPath().toString(), ReplChangeManager.checksumFor(file.getPath(), fs), null); - } catch (IOException e) { + } catch (IOException | MetaException e) { throw new RuntimeException(e); } } @@ -521,9 +521,10 @@ public boolean hasNext() { public String next() { String result; try { - result = ReplChangeManager.encodeFileUri(files.get(i), (chksums != null && !chksums.isEmpty()) ? chksums.get(i) : null, + result = ReplChangeManager.getInstance(conf). + encodeFileUri(files.get(i), (chksums != null && !chksums.isEmpty()) ? chksums.get(i) : null, subDirs != null ? subDirs.get(i) : null); - } catch (IOException e) { + } catch (IOException | MetaException e) { // File operations failed LOG.error("Encoding file URI failed with error " + e.getMessage()); throw new RuntimeException(e.getMessage()); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java index 41a1ce9e1d..8f1bc40407 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java @@ -1302,29 +1302,29 @@ public void testClearerEncrypted() throws Exception { ReplChangeManager.getInstance(hiveConfCmClearer).recycle(dirTbl3, RecycleType.MOVE, true); assertTrue(fsWarehouse.exists(ReplChangeManager.getCMPath(hiveConfCmClearer, part11.getName(), fileChksum11, - ReplChangeManager.getCmRoot(part11).toString()))); + ReplChangeManager.getInstance(conf).getCmRoot(part11).toString()))); assertTrue(fsWarehouse.exists(ReplChangeManager.getCMPath(hiveConfCmClearer, part12.getName(), fileChksum12, - ReplChangeManager.getCmRoot(part12).toString()))); + ReplChangeManager.getInstance(conf).getCmRoot(part12).toString()))); assertTrue(fsWarehouse.exists(ReplChangeManager.getCMPath(hiveConfCmClearer, part21.getName(), fileChksum21, - ReplChangeManager.getCmRoot(part21).toString()))); + ReplChangeManager.getInstance(conf).getCmRoot(part21).toString()))); assertTrue(fsWarehouse.exists(ReplChangeManager.getCMPath(hiveConfCmClearer, part22.getName(), fileChksum22, - ReplChangeManager.getCmRoot(part22).toString()))); + ReplChangeManager.getInstance(conf).getCmRoot(part22).toString()))); assertTrue(fsWarehouse.exists(ReplChangeManager.getCMPath(hiveConfCmClearer, part31.getName(), fileChksum31, - ReplChangeManager.getCmRoot(part31).toString()))); + ReplChangeManager.getInstance(conf).getCmRoot(part31).toString()))); assertTrue(fsWarehouse.exists(ReplChangeManager.getCMPath(hiveConfCmClearer, part32.getName(), fileChksum32, - ReplChangeManager.getCmRoot(part32).toString()))); + ReplChangeManager.getInstance(conf).getCmRoot(part32).toString()))); fsWarehouse.setTimes(ReplChangeManager.getCMPath(hiveConfCmClearer, part11.getName(), fileChksum11, - ReplChangeManager.getCmRoot(part11).toString()), + ReplChangeManager.getInstance(conf).getCmRoot(part11).toString()), now - 86400*1000*2, now - 86400*1000*2); fsWarehouse.setTimes(ReplChangeManager.getCMPath(hiveConfCmClearer, part21.getName(), fileChksum21, - ReplChangeManager.getCmRoot(part21).toString()), + ReplChangeManager.getInstance(conf).getCmRoot(part21).toString()), now - 86400*1000*2, now - 86400*1000*2); fsWarehouse.setTimes(ReplChangeManager.getCMPath(hiveConfCmClearer, part31.getName(), fileChksum31, - ReplChangeManager.getCmRoot(part31).toString()), + ReplChangeManager.getInstance(conf).getCmRoot(part31).toString()), now - 86400*1000*2, now - 86400*1000*2); fsWarehouse.setTimes(ReplChangeManager.getCMPath(hiveConfCmClearer, part32.getName(), fileChksum32, - ReplChangeManager.getCmRoot(part32).toString()), + ReplChangeManager.getInstance(conf).getCmRoot(part32).toString()), now - 86400*1000*2, now - 86400*1000*2); ReplChangeManager.scheduleCMClearer(hiveConfCmClearer); @@ -1339,17 +1339,17 @@ public void testClearerEncrypted() throws Exception { Assert.fail("timeout, cmroot has not been cleared"); } if (!fsWarehouse.exists(ReplChangeManager.getCMPath(hiveConfCmClearer, part11.getName(), fileChksum11, - ReplChangeManager.getCmRoot(part11).toString())) && + ReplChangeManager.getInstance(conf).getCmRoot(part11).toString())) && fsWarehouse.exists(ReplChangeManager.getCMPath(hiveConfCmClearer, part12.getName(), fileChksum12, - ReplChangeManager.getCmRoot(part12).toString())) && + ReplChangeManager.getInstance(conf).getCmRoot(part12).toString())) && !fsWarehouse.exists(ReplChangeManager.getCMPath(hiveConfCmClearer, part21.getName(), fileChksum21, - ReplChangeManager.getCmRoot(part21).toString())) && + ReplChangeManager.getInstance(conf).getCmRoot(part21).toString())) && fsWarehouse.exists(ReplChangeManager.getCMPath(hiveConfCmClearer, part22.getName(), fileChksum22, - ReplChangeManager.getCmRoot(part22).toString())) && + ReplChangeManager.getInstance(conf).getCmRoot(part22).toString())) && !fsWarehouse.exists(ReplChangeManager.getCMPath(hiveConfCmClearer, part31.getName(), fileChksum31, - ReplChangeManager.getCmRoot(part31).toString())) && + ReplChangeManager.getInstance(conf).getCmRoot(part31).toString())) && !fsWarehouse.exists(ReplChangeManager.getCMPath(hiveConfCmClearer, part32.getName(), fileChksum32, - ReplChangeManager.getCmRoot(part32).toString()))) { + ReplChangeManager.getInstance(conf).getCmRoot(part32).toString()))) { cleared = true; } } while (!cleared); diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractTestJdbcGenericUDTFGetSplits.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractTestJdbcGenericUDTFGetSplits.java index 21184bfb40..8cbca69737 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractTestJdbcGenericUDTFGetSplits.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractTestJdbcGenericUDTFGetSplits.java @@ -42,7 +42,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; /** * AbstractTestJdbcGenericUDTFGetSplits. @@ -51,7 +50,6 @@ protected static MiniHS2 miniHS2 = null; protected static String dataFileDir; protected static String tableName = "testtab1"; - protected static String partitionedTableName = "partitionedtesttab1"; protected static HiveConf conf = null; static Path kvDataFilePath; protected Connection hs2Conn = null; @@ -166,78 +164,14 @@ protected void testGenericUDTFOrderBySplitCount1(String udtfName, int[] expected query = "select " + udtfName + "(" + "'select value from " + tableName + " order by under_col limit 0', 5)"; runQuery(query, getConfigs(), expectedCounts[2]); - query = "select " + udtfName + "(" + "'select value from " + tableName + " limit 2', 5)"; - runQuery(query, getConfigs(), expectedCounts[3]); - - query = "select " + udtfName + "(" + "'select value from " + tableName + " group by value limit 2', 5)"; - runQuery(query, getConfigs(), expectedCounts[4]); - - query = "select " + udtfName + "(" + "'select value from " + tableName + " where value is not null limit 2', 5)"; - runQuery(query, getConfigs(), expectedCounts[5]); - query = "select " + udtfName + "(" + "'select `value` from (select value from " + tableName + " where value is not null order by value) as t', 5)"; - runQuery(query, getConfigs(), expectedCounts[6]); + runQuery(query, getConfigs(), expectedCounts[3]); List setCmds = getConfigs(); setCmds.add("set hive.llap.external.splits.order.by.force.single.split=false"); query = "select " + udtfName + "(" + "'select `value` from (select value from " + tableName + " where value is not null order by value) as t', 5)"; - runQuery(query, setCmds, expectedCounts[7]); - } - - protected void testGenericUDTFOrderBySplitCount1OnPartitionedTable(String udtfName, int[] expectedCounts) - throws Exception { - createPartitionedTestTable(null, partitionedTableName); - - String query = "select " + udtfName + "(" + "'select id from " + partitionedTableName + "', 5)"; - runQuery(query, getConfigs(), expectedCounts[0]); - - query = "select " + udtfName + "(" + "'select id from " + partitionedTableName + " order by id', 5)"; - runQuery(query, getConfigs(), expectedCounts[1]); - - query = "select " + udtfName + "(" + "'select id from " + partitionedTableName + " limit 2', 5)"; - runQuery(query, getConfigs(), expectedCounts[1]); - - query = "select " + udtfName + "(" + "'select id from " + partitionedTableName + " where id != 0 limit 2', 5)"; - runQuery(query, getConfigs(), expectedCounts[1]); - - query = "select " + udtfName + "(" + "'select id from " + partitionedTableName + " group by id limit 2', 5)"; - runQuery(query, getConfigs(), expectedCounts[1]); - - } - - private void createPartitionedTestTable(String database, String tableName) throws Exception { - Statement stmt = hs2Conn.createStatement(); - - if (database != null) { - stmt.execute("CREATE DATABASE IF NOT EXISTS " + database); - stmt.execute("USE " + database); - } - - // create table - stmt.execute("DROP TABLE IF EXISTS " + tableName); - stmt.execute("CREATE TABLE " + tableName - + " (id INT) partitioned by (p1 int)"); - - // load data - for (int i=1; i<=5; i++) { - String values = ""; - for (int j=1; j<=10; j++) { - if (j != 10) { - values+= "(" + j +"),"; - } else { - values+= "(" + j +")"; - } - } - stmt.execute("insert into " + tableName + " partition (p1=" + i +") " + " values " + values); - } - - - ResultSet res = stmt.executeQuery("SELECT count(*) FROM " + tableName); - assertTrue(res.next()); - assertEquals(50, res.getInt(1)); - res.close(); - stmt.close(); + runQuery(query, setCmds, expectedCounts[4]); } } diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java index fccf5ed4dd..defbe78802 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java @@ -38,15 +38,9 @@ @Test(timeout = 200000) public void testGenericUDTFOrderBySplitCount1() throws Exception { - super.testGenericUDTFOrderBySplitCount1("get_splits", new int[]{10, 1, 0, 2, 2, 2, 1, 10}); + super.testGenericUDTFOrderBySplitCount1("get_splits", new int[]{10, 1, 0, 1, 10}); } - @Test(timeout = 200000) - public void testGenericUDTFOrderBySplitCount1OnPartitionedTable() throws Exception { - super.testGenericUDTFOrderBySplitCount1OnPartitionedTable("get_splits", new int[]{10, 1, 2, 2, 2}); - } - - @Test public void testDecimalPrecisionAndScale() throws Exception { try (Statement stmt = hs2Conn.createStatement()) { diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits2.java index d296d56002..330174513c 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits2.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits2.java @@ -25,12 +25,7 @@ @Test(timeout = 200000) public void testGenericUDTFOrderBySplitCount1() throws Exception { - super.testGenericUDTFOrderBySplitCount1("get_llap_splits", new int[]{12, 3, 1, 4, 4, 4, 3, 12}); - } - - @Test(timeout = 200000) - public void testGenericUDTFOrderBySplitCount1OnPartitionedTable() throws Exception { - super.testGenericUDTFOrderBySplitCount1OnPartitionedTable("get_llap_splits", new int[]{12, 3, 4, 4, 4}); + super.testGenericUDTFOrderBySplitCount1("get_llap_splits", new int[]{12, 3, 1, 3, 12}); } } diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapArrow.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapArrow.java index 85a7ab06f9..bc2480a422 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapArrow.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapArrow.java @@ -39,7 +39,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.junit.AfterClass; -import org.junit.Ignore; import org.junit.Test; import java.sql.SQLException; import java.sql.Statement; @@ -510,12 +509,6 @@ public void testConcurrentAddAndCloseAndCloseAllConnections() throws Exception { } - @Override - @Ignore - public void testMultipleBatchesOfComplexTypes() { - // ToDo: FixMe - } - private void executeNTimes(Callable action, int noOfTimes, long intervalMillis, ExceptionHolder exceptionHolder) { for (int i = 0; i < noOfTimes; i++) { try { diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapRow.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapRow.java index 7fd1992caf..d954d0e2fe 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapRow.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapRow.java @@ -27,7 +27,6 @@ import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.junit.Ignore; /** * TestJdbcWithMiniLlap for llap Row format. @@ -46,11 +45,5 @@ public static void beforeTest() throws Exception { return new LlapRowInputFormat(); } - @Override - @Ignore - public void testMultipleBatchesOfComplexTypes() { - // ToDo: FixMe - } - } diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapVectorArrow.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapVectorArrow.java index 683ba484b1..35eda6cb0a 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapVectorArrow.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapVectorArrow.java @@ -40,7 +40,6 @@ import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.hive.llap.LlapArrowRowInputFormat; -import org.junit.Ignore; import org.junit.Test; /** @@ -241,8 +240,6 @@ public void testDataTypes() throws Exception { } - // ToDo: Fix me - @Ignore @Test public void testTypesNestedInListWithLimitAndFilters() throws Exception { try (Statement statement = hs2Conn.createStatement()) { @@ -337,8 +334,6 @@ public void testTypesNestedInListWithLimitAndFilters() throws Exception { } - // ToDo: Fix me - @Ignore @Test public void testTypesNestedInMapWithLimitAndFilters() throws Exception { try (Statement statement = hs2Conn.createStatement()) { @@ -412,22 +407,5 @@ private void verifyResult(List actual, Object[]... expected) { } } - @Override - @Ignore - public void testMultipleBatchesOfComplexTypes() { - // ToDo: FixMe - } - - @Override - @Ignore - public void testComplexQuery() { - // ToDo: FixMe - } - - @Override - @Ignore - public void testLlapInputFormatEndToEnd() { - // ToDo: FixMe - } } diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapVectorArrowBatch.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapVectorArrowBatch.java index 6b5c5df58f..bba599b883 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapVectorArrowBatch.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapVectorArrowBatch.java @@ -38,7 +38,6 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; import java.sql.SQLException; @@ -473,17 +472,5 @@ private void executeSQL(String query, String... moreQueries) throws SQLException // to be implemented for this reader } - @Override - @Ignore - public void testMultipleBatchesOfComplexTypes() { - // ToDo: FixMe - } - - @Override - @Ignore - public void testLlapInputFormatEndToEndWithMultipleBatches() { - // ToDo: FixMe - } - } diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestNewGetSplitsFormat.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestNewGetSplitsFormat.java index 5aac2a588e..ab1798d571 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestNewGetSplitsFormat.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestNewGetSplitsFormat.java @@ -29,7 +29,6 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.junit.BeforeClass; -import org.junit.Ignore; import java.util.ArrayList; import java.util.List; @@ -57,12 +56,6 @@ testJdbcWithMiniLlapVectorArrow.testDataTypes(); } - @Override - @Ignore - public void testMultipleBatchesOfComplexTypes() { - // ToDo: FixMe - } - @Override protected int processQuery(String currentDatabase, String query, int numSplits, RowProcessor rowProcessor) throws Exception { String url = miniHS2.getJdbcURL(); diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestNewGetSplitsFormatReturnPath.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestNewGetSplitsFormatReturnPath.java index 3edfabfbad..a437998490 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestNewGetSplitsFormatReturnPath.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestNewGetSplitsFormatReturnPath.java @@ -20,7 +20,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.junit.BeforeClass; -import org.junit.Ignore; /** * TestNewGetSplitsFormatReturnPath. @@ -34,10 +33,4 @@ conf.setBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP, true); BaseJdbcWithMiniLlap.beforeTest(conf); } - - @Override - @Ignore - public void testMultipleBatchesOfComplexTypes() { - // ToDo: FixMe - } } diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 5468728f83..c55f8db61a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -695,7 +695,6 @@ minillaplocal.query.files=\ parquet_legacy_mixed_date.q,\ parquet_legacy_mixed_timestamp.q,\ parquet_proleptic_mixed_date.q,\ - parquet_timestampt_to_bigint.q,\ partition_ctas.q,\ partition_multilevels.q,\ partition_shared_scan.q,\ @@ -821,9 +820,7 @@ minillaplocal.query.files=\ schq_ingest.q,\ sketches_hll.q,\ sketches_theta.q,\ - sketches_rewrite.q,\ sketches_materialized_view_rollup.q,\ - sketches_materialized_view_rollup2.q,\ table_access_keys_stats.q,\ temp_table_llap_partitioned.q,\ tez_bmj_schema_evolution.q,\ @@ -929,7 +926,6 @@ minillaplocal.query.files=\ vector_null_map.q,\ vector_number_compare_projection.q,\ vector_orc_merge_incompat_schema.q,\ - vector_offset_limit_reduce.q,\ vector_orc_nested_column_pruning.q,\ vector_orc_null_check.q,\ vector_order_null.q,\ @@ -1975,263 +1971,7 @@ minillaplocal.query.files=\ windowing_rank.q,\ windowing_udaf2.q,\ windowing_windowspec4.q,\ - zero_rows_single_insert.q,\ - acid_mapjoin.q,\ - acid_nullscan.q,\ - acid_stats2.q,\ - acid_stats3.q,\ - acid_stats4.q,\ - acid_stats5.q,\ - acid_table_stats.q,\ - acid_view_delete.q,\ - alias_casted_column.q,\ - allcolref_in_udf.q,\ - alterColumnStatsPart.q,\ - alter_change_db_location.q,\ - alter_db_owner.q,\ - alter_partition_coltype.q,\ - ambiguitycheck.q,\ - ambiguous_col.q,\ - analyze_table_null_partition.q,\ - analyze_tbl_date.q,\ - annotate_stats_deep_filters.q,\ - annotate_stats_filter.q,\ - annotate_stats_groupby.q,\ - annotate_stats_groupby2.q,\ - annotate_stats_join.q,\ - annotate_stats_join_pkfk.q,\ - annotate_stats_limit.q,\ - annotate_stats_part.q,\ - annotate_stats_select.q,\ - annotate_stats_table.q,\ - annotate_stats_udtf.q,\ - annotate_stats_union.q,\ - ansi_sql_arithmetic.q,\ - array_map_access_nonconstant.q,\ - array_size_estimation.q,\ - authorization_9.q,\ - authorization_explain.q,\ - authorization_owner_actions_db.q,\ - authorization_view_1.q,\ - authorization_view_disable_cbo_1.q,\ - autoColumnStats_11.q,\ - autoColumnStats_4.q,\ - autoColumnStats_5.q,\ - autoColumnStats_5a.q,\ - autoColumnStats_7.q,\ - autoColumnStats_8.q,\ - autoColumnStats_9.q,\ - auto_join10.q,\ - auto_join11.q,\ - auto_join12.q,\ - auto_join13.q,\ - auto_join14.q,\ - auto_join15.q,\ - auto_join16.q,\ - auto_join17.q,\ - auto_join18.q,\ - auto_join18_multi_distinct.q,\ - auto_join19.q,\ - auto_join19_inclause.q,\ - auto_join2.q,\ - auto_join20.q,\ - auto_join22.q,\ - auto_join23.q,\ - auto_join24.q,\ - auto_join25.q,\ - auto_join26.q,\ - auto_join27.q,\ - auto_join28.q,\ - auto_join3.q,\ - auto_join31.q,\ - auto_join32.q,\ - auto_join33.q,\ - auto_join4.q,\ - auto_join5.q,\ - auto_join6.q,\ - auto_join7.q,\ - auto_join8.q,\ - auto_join9.q,\ - auto_join_reordering_values.q,\ - auto_join_stats.q,\ - auto_join_stats2.q,\ - auto_join_without_localtask.q,\ - avro_date.q,\ - avro_schema_evolution_native.q,\ - avro_timestamp.q,\ - avrotblsjoin.q,\ - binarysortable_1.q,\ - bool_unknown.q,\ - bucket1.q,\ - bucketsortoptimize_insert_1.q,\ - bucketsortoptimize_insert_3.q,\ - bucketsortoptimize_insert_4.q,\ - bucketsortoptimize_insert_5.q,\ - bucketsortoptimize_insert_8.q,\ - case_sensitivity.q,\ - cast1.q,\ - cast_datetime_with_sql_2016_format.q,\ - cast_on_constant.q,\ - cbo_SortUnionTransposeRule.q,\ - cbo_const.q,\ - cbo_input26.q,\ - cbo_ppd_non_deterministic.q,\ - cbo_rp_annotate_stats_groupby.q,\ - cbo_rp_auto_join0.q,\ - cbo_rp_auto_join17.q,\ - cbo_rp_cross_product_check_2.q,\ - cbo_rp_gby2_map_multi_distinct.q,\ - cbo_rp_groupby3_noskew_multi_distinct.q,\ - cbo_rp_join0.q,\ - cbo_rp_join1.q,\ - cbo_rp_outer_join_ppr.q,\ - cbo_rp_simple_select.q,\ - cbo_rp_udaf_percentile_approx_23.q,\ - cbo_stats_estimation.q,\ - cbo_union_view.q,\ - char_serde.q,\ - char_udf1.q,\ - clusterctas.q,\ - columnStatsUpdateForStatsOptimizer_2.q,\ - column_pruner_multiple_children.q,\ - column_pruning_partitioned_view.q,\ - columnarserde_create_shortcut.q,\ - columnstats_partlvl.q,\ - columnstats_partlvl_dp.q,\ - columnstats_quoting.q,\ - columnstats_tbllvl.q,\ - combine2.q,\ - comments.q,\ - complex_alias.q,\ - compute_stats_date.q,\ - concat_op.q,\ - constGby.q,\ - constantPropWhen.q,\ - constantPropagateForSubQuery.q,\ - constant_prop.q,\ - constant_prop_1.q,\ - constant_prop_2.q,\ - constant_prop_3.q,\ - constant_prop_timestamp_date_cast.q,\ - constantfolding.q,\ - constprog1.q,\ - constprog2.q,\ - constprog3.q,\ - constprog_dp.q,\ - constprog_partitioner.q,\ - constprog_type.q,\ - constprog_when_case.q,\ - correlated_join_keys.q,\ - correlationoptimizer10.q,\ - correlationoptimizer11.q,\ - correlationoptimizer12.q,\ - correlationoptimizer13.q,\ - correlationoptimizer14.q,\ - correlationoptimizer15.q,\ - correlationoptimizer5.q,\ - correlationoptimizer7.q,\ - correlationoptimizer8.q,\ - correlationoptimizer9.q,\ - cp_sel.q,\ - create_view_disable_cbo.q,\ - cross_join_merge.q,\ - ctas_colname.q,\ - ctas_uses_database_location.q,\ - cte_6.q,\ - cte_join.q,\ - database_location.q,\ - database_properties.q,\ - date_2.q,\ - date_serde.q,\ - db_ddl_explain.q,\ - decimal_join2.q,\ - decimal_precision.q,\ - decimal_precision2.q,\ - decimal_stats.q,\ - decimal_udf.q,\ - decimal_udf2.q,\ - describe_database.q,\ - display_colstats_tbllvl.q,\ - distinct_groupby.q,\ - distinct_stats.q,\ - distinct_windowing.q,\ - distinct_windowing_no_cbo.q,\ - dynamic_partition_insert.q,\ - dynamic_partition_skip_default.q,\ - dynpart_merge.q,\ - dynpart_sort_optimization_acid2.q,\ - escape_clusterby1.q,\ - escape_distributeby1.q,\ - escape_orderby1.q,\ - escape_sortby1.q,\ - except_all.q,\ - exec_parallel_column_stats.q,\ - explain_ddl.q,\ - explain_logical.q,\ - explaindenpendencydiffengs.q,\ - extract.q,\ - extrapolate_part_stats_date.q,\ - extrapolate_part_stats_full.q,\ - extrapolate_part_stats_partial.q,\ - fetch_aggregation.q,\ - filter_aggr.q,\ - filter_cond_pushdown.q,\ - filter_cond_pushdown2.q,\ - filter_cond_pushdown_HIVE_15647.q,\ - filter_in_or_dup.q,\ - filter_numeric.q,\ - flatten_and_or.q,\ - floor_time.q,\ - fm-sketch.q,\ - fold_case.q,\ - fold_eq_with_case_when.q,\ - fold_to_null.q,\ - fold_when.q,\ - folder_predicate.q,\ - foldts.q,\ - fp_literal_arithmetic.q,\ - gby_star.q,\ - groupby10.q,\ - groupby11.q,\ - groupby12.q,\ - groupby13.q,\ - groupby1_limit.q,\ - groupby1_map.q,\ - groupby1_map_nomap.q,\ - groupby1_map_skew.q,\ - groupby1_noskew.q,\ - groupby2_limit.q,\ - groupby2_map.q,\ - groupby2_map_multi_distinct.q,\ - groupby2_map_skew.q,\ - groupby2_noskew.q,\ - groupby2_noskew_multi_distinct.q,\ - groupby3_map.q,\ - groupby3_map_multi_distinct.q,\ - groupby3_map_skew.q,\ - groupby3_noskew.q,\ - groupby3_noskew_multi_distinct.q,\ - groupby4.q,\ - groupby4_map.q,\ - groupby4_map_skew.q,\ - groupby4_noskew.q,\ - groupby5.q,\ - groupby5_map.q,\ - groupby5_map_skew.q,\ - groupby5_noskew.q,\ - groupby6.q,\ - groupby6_map.q,\ - groupby6_map_skew.q,\ - groupby6_noskew.q,\ - groupby7_map.q,\ - groupby7_map_multi_single_reducer.q,\ - groupby7_map_skew.q,\ - groupby7_noskew.q,\ - groupby7_noskew_multi_single_reducer.q,\ - groupby8.q,\ - groupby8_map.q,\ - groupby8_map_skew.q,\ - groupby8_noskew.q + zero_rows_single_insert.q encrypted.query.files=encryption_join_unencrypted_tbl.q,\ encryption_insert_partition_static.q,\ diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index 8d3856b742..034d7f89ee 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -204,9 +204,6 @@ public QTestUtil(QTestArguments testArgs) throws Exception { System.out.println("Setting hive-site: " + HiveConf.getHiveSiteLocation()); } - // For testing configurations set by System.setProperties - System.setProperty("hive.query.max.length", "100Mb"); - conf = new HiveConf(IDriver.class); setMetaStoreProperties(); diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java b/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java index 4b61ce153f..a74a3a880b 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java +++ b/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java @@ -51,20 +51,17 @@ import java.sql.SQLWarning; import java.util.ArrayList; import java.util.Base64; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Optional; /** - * The object used for executing a static SQL statement and returning the - * results it produces. + * HiveStatement. + * */ public class HiveStatement implements java.sql.Statement { - - private static final Logger LOG = LoggerFactory.getLogger(HiveStatement.class); + public static final Logger LOG = LoggerFactory.getLogger(HiveStatement.class.getName()); public static final String QUERY_CANCELLED_MESSAGE = "Query was cancelled."; private static final int DEFAULT_FETCH_SIZE = @@ -74,10 +71,10 @@ private TCLIService.Iface client; private TOperationHandle stmtHandle = null; private final TSessionHandle sessHandle; - Map sessConf = new HashMap<>(); + Map sessConf = new HashMap(); private int fetchSize; private final int defaultFetchSize; - private final boolean isScrollableResultset; + private boolean isScrollableResultset = false; private boolean isOperationComplete = false; private boolean closeOnResultSetCompletion = false; /** @@ -121,9 +118,15 @@ */ private boolean isLogBeingGenerated = true; + /** + * Keep this state so we can know whether the statement is submitted to HS2 and start execution + * successfully. + */ + private boolean isExecuteStatementFailed = false; + private int queryTimeout = 0; - private Optional inPlaceUpdateStream; + private InPlaceUpdateStream inPlaceUpdateStream = InPlaceUpdateStream.NO_OP; public HiveStatement(HiveConnection connection, TCLIService.Iface client, TSessionHandle sessHandle) { @@ -143,14 +146,25 @@ public HiveStatement(HiveConnection connection, TCLIService.Iface client, TSessi this.isScrollableResultset = isScrollableResultset; this.defaultFetchSize = defaultFetchSize; this.fetchSize = (initFetchSize == 0) ? defaultFetchSize : initFetchSize; - this.inPlaceUpdateStream = Optional.empty(); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#addBatch(java.lang.String) + */ + @Override public void addBatch(String sql) throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#cancel() + */ + @Override public void cancel() throws SQLException { checkConnection("cancel"); @@ -167,16 +181,28 @@ public void cancel() throws SQLException { } catch (SQLException e) { throw e; } catch (Exception e) { - throw new SQLException("Failed to cancel statement", "08S01", e); + throw new SQLException(e.toString(), "08S01", e); } isCancelled = true; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#clearBatch() + */ + @Override public void clearBatch() throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#clearWarnings() + */ + @Override public void clearWarnings() throws SQLException { warningChain = null; @@ -197,13 +223,14 @@ private void closeStatementIfNeeded() throws SQLException { } catch (SQLException e) { throw e; } catch (Exception e) { - throw new SQLException("Failed to close statement", "08S01", e); + throw new SQLException(e.toString(), "08S01", e); } } void closeClientOperation() throws SQLException { closeStatementIfNeeded(); isQueryClosed = true; + isExecuteStatementFailed = false; stmtHandle = null; } @@ -214,6 +241,11 @@ void closeOnResultSetCompletion() throws SQLException { } } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#close() + */ @Override public void close() throws SQLException { if (isClosed) { @@ -228,11 +260,17 @@ public void close() throws SQLException { isClosed = true; } - @Override + // JDK 1.7 public void closeOnCompletion() throws SQLException { closeOnResultSetCompletion = true; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#execute(java.lang.String) + */ + @Override public boolean execute(String sql) throws SQLException { runAsyncOnServer(sql); @@ -297,12 +335,15 @@ private void runAsyncOnServer(String sql) throws SQLException { TExecuteStatementResp execResp = client.ExecuteStatement(execReq); Utils.verifySuccessWithInfo(execResp.getStatus()); stmtHandle = execResp.getOperationHandle(); + isExecuteStatementFailed = false; } catch (SQLException eS) { + isExecuteStatementFailed = true; isLogBeingGenerated = false; throw eS; } catch (Exception ex) { + isExecuteStatementFailed = true; isLogBeingGenerated = false; - throw new SQLException("Failed to run async statement", "08S01", ex); + throw new SQLException(ex.toString(), "08S01", ex); } } @@ -315,12 +356,12 @@ private TGetOperationStatusResp waitForResultSetStatus() throws SQLException { TGetOperationStatusReq statusReq = new TGetOperationStatusReq(stmtHandle); TGetOperationStatusResp statusResp = null; - while (statusResp == null || !statusResp.isSetHasResultSet()) { + while(statusResp == null || !statusResp.isSetHasResultSet()) { try { statusResp = client.GetOperationStatus(statusReq); } catch (TException e) { isLogBeingGenerated = false; - throw new SQLException("Failed to wait for result set status", "08S01", e); + throw new SQLException(e.toString(), "08S01", e); } } @@ -328,15 +369,16 @@ private TGetOperationStatusResp waitForResultSetStatus() throws SQLException { } TGetOperationStatusResp waitForOperationToComplete() throws SQLException { - TGetOperationStatusResp statusResp = null; - - final TGetOperationStatusReq statusReq = new TGetOperationStatusReq(stmtHandle); - statusReq.setGetProgressUpdate(inPlaceUpdateStream.isPresent()); - - // Progress bar is completed if there is nothing to request - if (inPlaceUpdateStream.isPresent()) { - inPlaceUpdateStream.get().getEventNotifier().progressBarCompleted(); + TGetOperationStatusReq statusReq = new TGetOperationStatusReq(stmtHandle); + boolean shouldGetProgressUpdate = inPlaceUpdateStream != InPlaceUpdateStream.NO_OP; + statusReq.setGetProgressUpdate(shouldGetProgressUpdate); + if (!shouldGetProgressUpdate) { + /** + * progress bar is completed if there is nothing we want to request in the first place. + */ + inPlaceUpdateStream.getEventNotifier().progressBarCompleted(); } + TGetOperationStatusResp statusResp = null; // Poll on the operation status, till the operation is complete do { @@ -346,8 +388,8 @@ TGetOperationStatusResp waitForOperationToComplete() throws SQLException { * essentially return after the HIVE_SERVER2_LONG_POLLING_TIMEOUT (a server config) expires */ statusResp = client.GetOperationStatus(statusReq); - if (!isOperationComplete && inPlaceUpdateStream.isPresent()) { - inPlaceUpdateStream.get().update(statusResp.getProgressUpdateResponse()); + if(!isOperationComplete) { + inPlaceUpdateStream.update(statusResp.getProgressUpdateResponse()); } Utils.verifySuccessWithInfo(statusResp.getStatus()); if (statusResp.isSetOperationState()) { @@ -359,10 +401,12 @@ TGetOperationStatusResp waitForOperationToComplete() throws SQLException { break; case CANCELED_STATE: // 01000 -> warning - final String errMsg = statusResp.getErrorMessage(); - final String fullErrMsg = - (errMsg == null || errMsg.isEmpty()) ? QUERY_CANCELLED_MESSAGE : QUERY_CANCELLED_MESSAGE + " " + errMsg; - throw new SQLException(fullErrMsg, "01000"); + String errMsg = statusResp.getErrorMessage(); + if (errMsg != null && !errMsg.isEmpty()) { + throw new SQLException(QUERY_CANCELLED_MESSAGE + " " + errMsg, "01000"); + } else { + throw new SQLException(QUERY_CANCELLED_MESSAGE, "01000"); + } case TIMEDOUT_STATE: throw new SQLTimeoutException("Query timed out after " + queryTimeout + " seconds"); case ERROR_STATE: @@ -382,20 +426,20 @@ TGetOperationStatusResp waitForOperationToComplete() throws SQLException { throw e; } catch (Exception e) { isLogBeingGenerated = false; - throw new SQLException("Failed to wait for operation to complete", "08S01", e); + throw new SQLException(e.toString(), "08S01", e); } } while (!isOperationComplete); - // set progress bar to be completed when hive query execution has completed - if (inPlaceUpdateStream.isPresent()) { - inPlaceUpdateStream.get().getEventNotifier().progressBarCompleted(); - } + /* + we set progress bar to be completed when hive query execution has completed + */ + inPlaceUpdateStream.getEventNotifier().progressBarCompleted(); return statusResp; } private void checkConnection(String action) throws SQLException { if (isClosed) { - throw new SQLException("Cannot " + action + " after statement has been closed"); + throw new SQLException("Can't " + action + " after statement has been closed"); } } @@ -408,130 +452,269 @@ private void reInitState() throws SQLException { isCancelled = false; isQueryClosed = false; isLogBeingGenerated = true; + isExecuteStatementFailed = false; isOperationComplete = false; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#execute(java.lang.String, int) + */ + @Override public boolean execute(String sql, int autoGeneratedKeys) throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#execute(java.lang.String, int[]) + */ + @Override public boolean execute(String sql, int[] columnIndexes) throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#execute(java.lang.String, java.lang.String[]) + */ + @Override public boolean execute(String sql, String[] columnNames) throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#executeBatch() + */ + @Override public int[] executeBatch() throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#executeQuery(java.lang.String) + */ + @Override public ResultSet executeQuery(String sql) throws SQLException { if (!execute(sql)) { - throw new SQLException("The query did not generate a result set"); + throw new SQLException("The query did not generate a result set!"); } return resultSet; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#executeUpdate(java.lang.String) + */ + @Override public int executeUpdate(String sql) throws SQLException { execute(sql); return getUpdateCount(); + //return getLargeUpdateCount(); - not currently implemented... wrong type } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#executeUpdate(java.lang.String, int) + */ + @Override public int executeUpdate(String sql, int autoGeneratedKeys) throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#executeUpdate(java.lang.String, int[]) + */ + @Override public int executeUpdate(String sql, int[] columnIndexes) throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#executeUpdate(java.lang.String, java.lang.String[]) + */ + @Override public int executeUpdate(String sql, String[] columnNames) throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getConnection() + */ + @Override public Connection getConnection() throws SQLException { checkConnection("getConnection"); return this.connection; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getFetchDirection() + */ + @Override public int getFetchDirection() throws SQLException { checkConnection("getFetchDirection"); return ResultSet.FETCH_FORWARD; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getFetchSize() + */ + @Override public int getFetchSize() throws SQLException { checkConnection("getFetchSize"); return fetchSize; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getGeneratedKeys() + */ + @Override public ResultSet getGeneratedKeys() throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getMaxFieldSize() + */ + @Override public int getMaxFieldSize() throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getMaxRows() + */ + @Override public int getMaxRows() throws SQLException { checkConnection("getMaxRows"); return maxRows; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getMoreResults() + */ + @Override public boolean getMoreResults() throws SQLException { return false; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getMoreResults(int) + */ + @Override public boolean getMoreResults(int current) throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getQueryTimeout() + */ + @Override public int getQueryTimeout() throws SQLException { checkConnection("getQueryTimeout"); - return this.queryTimeout; + return 0; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getResultSet() + */ + @Override public ResultSet getResultSet() throws SQLException { checkConnection("getResultSet"); return resultSet; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getResultSetConcurrency() + */ + @Override public int getResultSetConcurrency() throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getResultSetHoldability() + */ + @Override public int getResultSetHoldability() throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getResultSetType() + */ + @Override public int getResultSetType() throws SQLException { checkConnection("getResultSetType"); return ResultSet.TYPE_FORWARD_ONLY; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getUpdateCount() + */ @Override public int getUpdateCount() throws SQLException { checkConnection("getUpdateCount"); @@ -540,44 +723,74 @@ public int getUpdateCount() throws SQLException { * client might end up using executeAsync and then call this to check if the query run is * finished. */ - long numModifiedRows = -1L; + long numModifiedRows = -1; TGetOperationStatusResp resp = waitForOperationToComplete(); if (resp != null) { numModifiedRows = resp.getNumModifiedRows(); } - if (numModifiedRows == -1L || numModifiedRows > Integer.MAX_VALUE) { - LOG.warn("Invalid number of updated rows: {}", numModifiedRows); + if (numModifiedRows == -1 || numModifiedRows > Integer.MAX_VALUE) { + LOG.warn("Number of rows is greater than Integer.MAX_VALUE"); return -1; } return (int) numModifiedRows; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#getWarnings() + */ + @Override public SQLWarning getWarnings() throws SQLException { checkConnection("getWarnings"); return warningChain; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#isClosed() + */ + @Override public boolean isClosed() throws SQLException { return isClosed; } - @Override + // JDK 1.7 public boolean isCloseOnCompletion() throws SQLException { return closeOnResultSetCompletion; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#isPoolable() + */ + @Override public boolean isPoolable() throws SQLException { return false; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#setCursorName(java.lang.String) + */ + @Override public void setCursorName(String name) throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#setEscapeProcessing(boolean) + */ + @Override public void setEscapeProcessing(boolean enable) throws SQLException { if (enable) { @@ -585,14 +798,26 @@ public void setEscapeProcessing(boolean enable) throws SQLException { } } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#setFetchDirection(int) + */ + @Override public void setFetchDirection(int direction) throws SQLException { checkConnection("setFetchDirection"); if (direction != ResultSet.FETCH_FORWARD) { - throw new SQLException("Not supported direction: " + direction); + throw new SQLException("Not supported direction " + direction); } } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#setFetchSize(int) + */ + @Override public void setFetchSize(int rows) throws SQLException { checkConnection("setFetchSize"); @@ -605,35 +830,71 @@ public void setFetchSize(int rows) throws SQLException { } } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#setMaxFieldSize(int) + */ + @Override public void setMaxFieldSize(int max) throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#setMaxRows(int) + */ + @Override public void setMaxRows(int max) throws SQLException { checkConnection("setMaxRows"); if (max < 0) { - throw new SQLException("Maximum number of rows must be >= 0"); + throw new SQLException("max must be >= 0"); } maxRows = max; } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#setPoolable(boolean) + */ + @Override public void setPoolable(boolean poolable) throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } + /* + * (non-Javadoc) + * + * @see java.sql.Statement#setQueryTimeout(int) + */ + @Override public void setQueryTimeout(int seconds) throws SQLException { this.queryTimeout = seconds; } + /* + * (non-Javadoc) + * + * @see java.sql.Wrapper#isWrapperFor(java.lang.Class) + */ + @Override public boolean isWrapperFor(Class iface) throws SQLException { return false; } + /* + * (non-Javadoc) + * + * @see java.sql.Wrapper#unwrap(java.lang.Class) + */ + @Override public T unwrap(Class iface) throws SQLException { throw new SQLException("Cannot unwrap to " + iface); @@ -683,6 +944,7 @@ public boolean hasMoreLogs() { "statement has been closed or cancelled."); } + List logs = new ArrayList(); TFetchResultsResp tFetchResultsResp = null; try { if (stmtHandle != null) { @@ -696,30 +958,36 @@ public boolean hasMoreLogs() { throw new ClosedOrCancelledStatementException("Method getQueryLog() failed. The " + "statement has been closed or cancelled."); } else { - return Collections.emptyList(); + return logs; } } } catch (SQLException e) { throw e; + } catch (TException e) { + throw new SQLException("Error when getting query log: " + e, e); } catch (Exception e) { - throw new SQLException("Error when getting query log", e); + throw new SQLException("Error when getting query log: " + e, e); } - final List logs = new ArrayList<>(); try { - final RowSet rowSet = RowSetFactory.create(tFetchResultsResp.getResults(), connection.getProtocol()); + RowSet rowSet; + rowSet = RowSetFactory.create(tFetchResultsResp.getResults(), connection.getProtocol()); for (Object[] row : rowSet) { logs.add(String.valueOf(row[0])); } } catch (TException e) { - throw new SQLException("Error building result set for query log", e); + throw new SQLException("Error building result set for query log: " + e, e); } - return Collections.unmodifiableList(logs); + return logs; } private TFetchOrientation getFetchOrientation(boolean incremental) { - return (incremental) ? TFetchOrientation.FETCH_NEXT : TFetchOrientation.FETCH_FIRST; + if (incremental) { + return TFetchOrientation.FETCH_NEXT; + } else { + return TFetchOrientation.FETCH_FIRST; + } } /** @@ -738,18 +1006,17 @@ public String getYarnATSGuid() { } /** - * This is only used by the beeline client to set the stream on which in place - * progress updates are to be shown. + * This is only used by the beeline client to set the stream on which in place progress updates + * are to be shown */ public void setInPlaceUpdateStream(InPlaceUpdateStream stream) { - this.inPlaceUpdateStream = Optional.ofNullable(stream); + this.inPlaceUpdateStream = stream; } /** - * Returns the Query ID if it is running. This method is a public API for - * usage outside of Hive, although it is not part of the interface - * java.sql.Statement. - * + * Returns the Query ID if it is running. + * This method is a public API for usage outside of Hive, although it is not part of the + * interface java.sql.Statement. * @return Valid query ID if it is running else returns NULL. * @throws SQLException If any internal failures. */ @@ -763,7 +1030,7 @@ public String getQueryId() throws SQLException { return null; } try { - final String queryId = client.GetQueryId(new TGetQueryIdReq(stmtHandleTmp)).getQueryId(); + String queryId = client.GetQueryId(new TGetQueryIdReq(stmtHandleTmp)).getQueryId(); // queryId can be empty string if query was already closed. Need to return null in such case. return StringUtils.isBlank(queryId) ? null : queryId; diff --git a/jdbc/src/java/org/apache/hive/jdbc/logs/InPlaceUpdateStream.java b/jdbc/src/java/org/apache/hive/jdbc/logs/InPlaceUpdateStream.java index 310c4969d2..cb5fb82214 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/logs/InPlaceUpdateStream.java +++ b/jdbc/src/java/org/apache/hive/jdbc/logs/InPlaceUpdateStream.java @@ -24,6 +24,20 @@ public interface InPlaceUpdateStream { void update(TProgressUpdateResp response); + InPlaceUpdateStream NO_OP = new InPlaceUpdateStream() { + private final EventNotifier eventNotifier = new EventNotifier(); + @Override + public void update(TProgressUpdateResp response) { + + } + + @Override + public EventNotifier getEventNotifier() { + return eventNotifier; + } + + }; + EventNotifier getEventNotifier(); class EventNotifier { diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java index 9cb8bc9449..565afdc90b 100644 --- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java +++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java @@ -813,8 +813,8 @@ public void run() { Futures.addCallback(schedulerFuture, new LoggingFutureCallback("SchedulerThread", LOG)); registry.start(); - activeInstances = registry.getInstances(); registry.registerStateChangeListener(new NodeStateChangeListener()); + activeInstances = registry.getInstances(); for (LlapServiceInstance inst : activeInstances.getAll()) { registerAndAddNode(new NodeInfo(inst, nodeBlacklistConf, clock, numSchedulableTasksPerNode, metrics), inst); diff --git a/llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskCommunicator.java b/llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskCommunicator.java index 3bbbdf3fff..9b4ac271f0 100644 --- a/llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskCommunicator.java +++ b/llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskCommunicator.java @@ -66,7 +66,7 @@ public class TestLlapTaskCommunicator { - @Test (timeout = 30000) + @Test (timeout = 5000) public void testEntityTracker1() { LlapTaskCommunicator.EntityTracker entityTracker = new LlapTaskCommunicator.EntityTracker(); @@ -121,7 +121,7 @@ public void testEntityTracker1() { } - @Test(timeout = 30000) + @Test(timeout = 5000) public void testFinishableStateUpdateFailure() throws Exception { LlapTaskCommunicatorWrapperForTest wrapper = null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetLocationDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetLocationDesc.java index ddb320692a..16d28f2aa5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetLocationDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetLocationDesc.java @@ -29,15 +29,31 @@ public class AlterDatabaseSetLocationDesc extends AbstractAlterDatabaseDesc { private static final long serialVersionUID = 1L; - private final String location; + private String location = null; + private String managedLocation = null; public AlterDatabaseSetLocationDesc(String databaseName, String location) { + this(databaseName, location,null); + } + + public AlterDatabaseSetLocationDesc(String databaseName, String location, String managedLocation) { super(databaseName, null); - this.location = location; + if (location != null) { + this.location = location; + } + + if (managedLocation != null) { + this.managedLocation = managedLocation; + } } @Explain(displayName="location") public String getLocation() { return location; } + + @Explain(displayName="managedLocation") + public String getManagedLocation() { + return managedLocation; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetLocationOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetLocationOperation.java index 949c9aece5..0c4ade3538 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetLocationOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetLocationOperation.java @@ -23,12 +23,10 @@ import java.util.Map; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.ddl.DDLOperationContext; import org.apache.hadoop.hive.ql.ddl.database.alter.AbstractAlterDatabaseOperation; -import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.HiveException; /** @@ -42,23 +40,34 @@ public AlterDatabaseSetLocationOperation(DDLOperationContext context, AlterDatab @Override protected void doAlteration(Database database, Map params) throws HiveException { try { - String newLocation = Utilities.getQualifiedPath(context.getConf(), new Path(desc.getLocation())); + String newLocation = desc.getLocation(); + if (newLocation != null) { + URI locationURI = new URI(newLocation); + if (!locationURI.isAbsolute() || StringUtils.isBlank(locationURI.getScheme())) { + throw new HiveException(ErrorMsg.BAD_LOCATION_VALUE, newLocation); + } - if (newLocation.equalsIgnoreCase(database.getManagedLocationUri())) { - throw new HiveException("Managed and external locations for database cannot be the same"); + if (newLocation.equals(database.getLocationUri())) { + LOG.info("AlterDatabase skipped. No change in location."); + } else { + database.setLocationUri(newLocation); + } + return; } - URI locationURI = new URI(newLocation); - if (!locationURI.isAbsolute() || StringUtils.isBlank(locationURI.getScheme())) { - throw new HiveException(ErrorMsg.BAD_LOCATION_VALUE, newLocation); - } + newLocation = desc.getManagedLocation(); + if (newLocation != null) { + URI locationURI = new URI(newLocation); + if (!locationURI.isAbsolute() || StringUtils.isBlank(locationURI.getScheme())) { + throw new HiveException(ErrorMsg.BAD_LOCATION_VALUE, newLocation); + } - if (newLocation.equals(database.getLocationUri())) { - LOG.info("AlterDatabase skipped. No change in location."); - } else { - database.setLocationUri(newLocation); + if (newLocation.equals(database.getManagedLocationUri())) { + LOG.info("AlterDatabase skipped. No change in location."); + } else { + database.setManagedLocationUri(newLocation); + } } - return; } catch (URISyntaxException e) { throw new HiveException(e); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetManagedLocationAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetManagedLocationAnalyzer.java index db7a3ba1b2..a0e92eb3ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetManagedLocationAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetManagedLocationAnalyzer.java @@ -26,7 +26,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; /** - * Analyzer for database set managed location commands. + * Analyzer for database set location commands. */ @DDLType(types = HiveParser.TOK_ALTERDATABASE_MANAGEDLOCATION) public class AlterDatabaseSetManagedLocationAnalyzer extends AbstractAlterDatabaseAnalyzer { @@ -41,7 +41,7 @@ public void analyzeInternal(ASTNode root) throws SemanticException { outputs.add(toWriteEntity(newLocation)); - AlterDatabaseSetManagedLocationDesc desc = new AlterDatabaseSetManagedLocationDesc(databaseName, newLocation); + AlterDatabaseSetLocationDesc desc = new AlterDatabaseSetLocationDesc(databaseName, null, newLocation); addAlterDatabaseDesc(desc); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetManagedLocationDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetManagedLocationDesc.java deleted file mode 100644 index fc43583882..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetManagedLocationDesc.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.ddl.database.alter.location; - -import org.apache.hadoop.hive.ql.ddl.database.alter.AbstractAlterDatabaseDesc; -import org.apache.hadoop.hive.ql.plan.Explain; -import org.apache.hadoop.hive.ql.plan.Explain.Level; - -/** - * DDL task description for ALTER DATABASE ... SET MANAGEDLOCATION ... commands. - */ -@Explain(displayName = "Set Database Managed Location", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) -public class AlterDatabaseSetManagedLocationDesc extends AbstractAlterDatabaseDesc { - private static final long serialVersionUID = 1L; - - private final String managedLocation; - - public AlterDatabaseSetManagedLocationDesc(String databaseName, String managedLocation) { - super(databaseName, null); - this.managedLocation = managedLocation; - } - - @Explain(displayName="managed location") - public String getManagedLocation() { - return managedLocation; - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetManagedLocationOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetManagedLocationOperation.java deleted file mode 100644 index 64a6a8ffbf..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/alter/location/AlterDatabaseSetManagedLocationOperation.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.ddl.database.alter.location; - -import java.net.URI; -import java.net.URISyntaxException; -import java.util.Map; - -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.ddl.DDLOperationContext; -import org.apache.hadoop.hive.ql.ddl.database.alter.AbstractAlterDatabaseOperation; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.metadata.HiveException; - -/** - * Operation process of altering a database's managed location. - */ -public class AlterDatabaseSetManagedLocationOperation - extends AbstractAlterDatabaseOperation { - public AlterDatabaseSetManagedLocationOperation(DDLOperationContext context, - AlterDatabaseSetManagedLocationDesc desc) { - super(context, desc); - } - - @Override - protected void doAlteration(Database database, Map params) throws HiveException { - try { - String newManagedLocation = Utilities.getQualifiedPath(context.getConf(), new Path(desc.getManagedLocation())); - - if (database.getLocationUri().equalsIgnoreCase(newManagedLocation)) { - throw new HiveException("Managed and external locations for database cannot be the same"); - } - - URI locationURI = new URI(newManagedLocation); - if (!locationURI.isAbsolute() || StringUtils.isBlank(locationURI.getScheme())) { - throw new HiveException(ErrorMsg.BAD_LOCATION_VALUE, newManagedLocation); - } - - if (newManagedLocation.equals(database.getManagedLocationUri())) { - LOG.info("AlterDatabase skipped. No change in location."); - } else { - database.setManagedLocationUri(newManagedLocation); - } - } catch (URISyntaxException e) { - throw new HiveException(e); - } - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseAnalyzer.java index 0cf58ee109..f87dd2a64e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseAnalyzer.java @@ -46,8 +46,7 @@ public void analyzeInternal(ASTNode root) throws SemanticException { boolean ifNotExists = false; String comment = null; - String locationUri = null; - String managedLocationUri = null; + String locationUri = null, managedLocationUri = null; Map props = null; for (int i = 1; i < root.getChildCount(); i++) { @@ -75,14 +74,12 @@ public void analyzeInternal(ASTNode root) throws SemanticException { } } - CreateDatabaseDesc desc = new CreateDatabaseDesc(databaseName, comment, locationUri, managedLocationUri, - ifNotExists, props); + CreateDatabaseDesc desc = new CreateDatabaseDesc(databaseName, comment, locationUri, ifNotExists, props, managedLocationUri); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc))); Database database = new Database(databaseName, comment, locationUri, props); - if (managedLocationUri != null) { + if (managedLocationUri != null) database.setManagedLocationUri(managedLocationUri); - } outputs.add(new WriteEntity(database, WriteEntity.WriteType.DDL_NO_LOCK)); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseDesc.java index 167672b2b0..f3959f0b2a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseDesc.java @@ -39,14 +39,19 @@ private final boolean ifNotExists; private final Map dbProperties; - public CreateDatabaseDesc(String databaseName, String comment, String locationUri, String managedLocationUri, - boolean ifNotExists, Map dbProperties) { + public CreateDatabaseDesc(String databaseName, String comment, String locationUri, boolean ifNotExists, + Map dbProperties) { + this(databaseName, comment, locationUri, ifNotExists, dbProperties, null); + } + + public CreateDatabaseDesc(String databaseName, String comment, String locationUri, boolean ifNotExists, + Map dbProperties, String managedLocationUri) { this.databaseName = databaseName; this.comment = comment; this.locationUri = locationUri; - this.managedLocationUri = managedLocationUri; this.ifNotExists = ifNotExists; this.dbProperties = dbProperties; + this.managedLocationUri = managedLocationUri; } @Explain(displayName="if not exists", displayOnlyOnTrue = true) @@ -73,7 +78,7 @@ public String getLocationUri() { return locationUri; } - @Explain(displayName="managed location uri") + @Explain(displayName="managedLocationUri") public String getManagedLocationUri() { return managedLocationUri; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseOperation.java index db06161296..444db0a8b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseOperation.java @@ -42,19 +42,20 @@ public CreateDatabaseOperation(DDLOperationContext context, CreateDatabaseDesc d @Override public int execute() throws HiveException { - Database database = new Database(desc.getName(), desc.getComment(), desc.getLocationUri(), - desc.getDatabaseProperties()); + Database database = new Database(); + database.setName(desc.getName()); + database.setDescription(desc.getComment()); + database.setLocationUri(desc.getLocationUri()); + database.setParameters(desc.getDatabaseProperties()); database.setOwnerName(SessionState.getUserFromAuthenticator()); database.setOwnerType(PrincipalType.USER); - if (desc.getManagedLocationUri() != null) { + if (desc.getManagedLocationUri() != null) database.setManagedLocationUri(desc.getManagedLocationUri()); - } try { makeLocationQualified(database); - if (database.getLocationUri().equalsIgnoreCase(database.getManagedLocationUri())) { + if (database.getLocationUri().equalsIgnoreCase(database.getManagedLocationUri())) throw new HiveException("Managed and external locations for database cannot be the same"); - } context.getDb().createDatabase(database, desc.getIfNotExists()); } catch (AlreadyExistsException ex) { //it would be better if AlreadyExistsException had an errorCode field.... @@ -72,9 +73,8 @@ private void makeLocationQualified(Database database) throws HiveException { String rootDir = MetastoreConf.getVar(context.getConf(), MetastoreConf.ConfVars.WAREHOUSE_EXTERNAL); if (rootDir == null || rootDir.trim().isEmpty()) { // Fallback plan - LOG.warn(String.format( - "%s is not set, falling back to %s. This could cause external tables to use to managed tablespace.", - MetastoreConf.ConfVars.WAREHOUSE_EXTERNAL.getVarname(), MetastoreConf.ConfVars.WAREHOUSE.getVarname())); + LOG.warn(MetastoreConf.ConfVars.WAREHOUSE_EXTERNAL.getVarname() + " is not set, falling back to " + + MetastoreConf.ConfVars.WAREHOUSE.getVarname() + ". This could cause external tables to use to managed tablespace."); rootDir = MetastoreConf.getVar(context.getConf(), MetastoreConf.ConfVars.WAREHOUSE); } Path path = new Path(rootDir, database.getName().toLowerCase() + DATABASE_PATH_SUFFIX); @@ -84,8 +84,7 @@ private void makeLocationQualified(Database database) throws HiveException { if (database.isSetManagedLocationUri()) { // TODO should we enforce a location check here? - database.setManagedLocationUri(Utilities.getQualifiedPath(context.getConf(), - new Path(database.getManagedLocationUri()))); + database.setManagedLocationUri(Utilities.getQualifiedPath(context.getConf(), new Path(database.getManagedLocationUri()))); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/desc/DescDatabaseDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/desc/DescDatabaseDesc.java index b92ed21cad..be0e5a963c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/desc/DescDatabaseDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/desc/DescDatabaseDesc.java @@ -33,17 +33,16 @@ private static final long serialVersionUID = 1L; public static final String DESC_DATABASE_SCHEMA = - "db_name,comment,location,managedLocation,owner_name,owner_type,parameters#" + - "string:string:string:string:string:string:string"; + "db_name,comment,location,managedLocation,owner_name,owner_type,parameters#string:string:string:string:string:string:string"; private final String resFile; private final String dbName; private final boolean isExt; public DescDatabaseDesc(Path resFile, String dbName, boolean isExt) { + this.isExt = isExt; this.resFile = resFile.toString(); this.dbName = dbName; - this.isExt = isExt; } public boolean isExt() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/drop/DropDatabaseOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/drop/DropDatabaseOperation.java index 74ac07db8f..a116a0e414 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/drop/DropDatabaseOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/drop/DropDatabaseOperation.java @@ -52,7 +52,8 @@ public int execute() throws HiveException { context.getDb().dropDatabase(dbName, true, desc.getIfExists(), desc.isCasdade()); if (LlapHiveUtils.isLlapMode(context.getConf())) { - ProactiveEviction.Request.Builder llapEvictRequestBuilder = ProactiveEviction.Request.Builder.create(); + ProactiveEviction.Request.Builder llapEvictRequestBuilder = + ProactiveEviction.Request.Builder.create(); llapEvictRequestBuilder.addDb(dbName); ProactiveEviction.evict(context.getConf(), llapEvictRequestBuilder.build()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/show/ShowCreateTableAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/show/ShowCreateTableAnalyzer.java index 7e5dd41fbf..b362837439 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/show/ShowCreateTableAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/show/ShowCreateTableAnalyzer.java @@ -54,10 +54,12 @@ public void analyzeInternal(ASTNode root) throws SemanticException { // If no DB was specified in statement, do not include it in the final output ShowCreateTableDesc desc = new ShowCreateTableDesc(table.getDbName(), table.getTableName(), ctx.getResFile().toString(), StringUtils.isBlank(tableIdentifier.getKey())); + Task task = TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc)); + task.setFetchSource(true); + rootTasks.add(task); - task.setFetchSource(true); setFetchTask(createFetchTask(ShowCreateTableDesc.SCHEMA)); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/show/ShowCreateTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/show/ShowCreateTableOperation.java index e6b1bae114..bf913442f9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/show/ShowCreateTableOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/show/ShowCreateTableOperation.java @@ -103,7 +103,7 @@ public int execute() throws HiveException { } private static final String CREATE_VIEW_TEMPLATE = - "CREATE VIEW `<" + DATABASE_NAME + ">`.`<" + TABLE_NAME + ">` AS "; + "CREATE VIEW `<" + DATABASE_NAME + ">`." + "`<" + TABLE_NAME + ">`" + " AS "; private String getCreateViewCommand(Table table, boolean isRelative) { ST command = new ST(CREATE_VIEW_TEMPLATE); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/drop/DropTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/drop/DropTableOperation.java index 8518b844d7..72b694f668 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/drop/DropTableOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/drop/DropTableOperation.java @@ -108,7 +108,8 @@ public int execute() throws HiveException { if (LlapHiveUtils.isLlapMode(context.getConf())) { TableName tableName = HiveTableName.of(table); - ProactiveEviction.Request.Builder llapEvictRequestBuilder = ProactiveEviction.Request.Builder.create(); + ProactiveEviction.Request.Builder llapEvictRequestBuilder = + ProactiveEviction.Request.Builder.create(); llapEvictRequestBuilder.addTable(tableName.getDb(), tableName.getTable()); ProactiveEviction.evict(context.getConf(), llapEvictRequestBuilder.build()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/drop/AlterTableDropPartitionOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/drop/AlterTableDropPartitionOperation.java index 06f083c012..ae2c341c19 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/drop/AlterTableDropPartitionOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/drop/AlterTableDropPartitionOperation.java @@ -120,8 +120,9 @@ private void dropPartitions() throws HiveException { List droppedPartitions = context.getDb().dropPartitions(tablenName.getDb(), tablenName.getTable(), partitionExpressions, options); - ProactiveEviction.Request.Builder llapEvictRequestBuilder = LlapHiveUtils.isLlapMode(context.getConf()) ? - ProactiveEviction.Request.Builder.create() : null; + ProactiveEviction.Request.Builder llapEvictRequestBuilder = + LlapHiveUtils.isLlapMode(context.getConf()) ? + ProactiveEviction.Request.Builder.create() : null; for (Partition partition : droppedPartitions) { context.getConsole().printInfo("Dropped the partition " + partition.getName()); @@ -132,7 +133,6 @@ private void dropPartitions() throws HiveException { llapEvictRequestBuilder.addPartitionOfATable(tablenName.getDb(), tablenName.getTable(), partition.getSpec()); } } - if (llapEvictRequestBuilder != null) { ProactiveEviction.evict(context.getConf(), llapEvictRequestBuilder.build()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java index d1d0e3cbe6..4fb53785c2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.ddl.view.materialized.alter.rebuild; +import org.antlr.runtime.tree.Tree; import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.metastore.api.LockState; import org.apache.hadoop.hive.ql.Context; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java index 88653805aa..eec90c630b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java @@ -18,28 +18,21 @@ package org.apache.hadoop.hive.ql.exec; -import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; - -import org.apache.calcite.jdbc.JavaTypeFactoryImpl; -import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeImpl; import org.apache.calcite.rel.type.RelProtoDataType; import org.apache.calcite.sql.SqlFunction; -import org.apache.calcite.sql.SqlFunctionCategory; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.InferTypes; import org.apache.calcite.sql.type.OperandTypes; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveMergeableAggregate; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSqlFunction; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver2; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hive.plugin.api.HiveUDFPlugin; @@ -55,9 +48,9 @@ private static final String DATASKETCHES_PREFIX = "ds"; - public static final String DATA_TO_SKETCH = "sketch"; - public static final String SKETCH_TO_ESTIMATE = "estimate"; + private static final String DATA_TO_SKETCH = "sketch"; private static final String SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS = "estimate_bounds"; + private static final String SKETCH_TO_ESTIMATE = "estimate"; private static final String SKETCH_TO_STRING = "stringify"; private static final String UNION_SKETCH = "union"; private static final String UNION_SKETCH1 = "union_f"; @@ -80,12 +73,12 @@ private static final String SKETCH_TO_VARIANCES = "variances"; private static final String SKETCH_TO_PERCENTILE = "percentile"; - private final Map sketchClasses; + private final List sketchClasses; private final ArrayList descriptors; private DataSketchesFunctions() { - this.sketchClasses = new HashMap<>(); - this.descriptors = new ArrayList<>(); + this.sketchClasses = new ArrayList(); + this.descriptors = new ArrayList(); registerHll(); registerCpc(); registerKll(); @@ -103,31 +96,19 @@ private DataSketchesFunctions() { return descriptors; } - public SketchFunctionDescriptor getSketchFunction(String className, String function) { - if (!sketchClasses.containsKey(className)) { - throw new IllegalArgumentException(String.format("Sketch-class '%s' doesn't exists", className)); - } - SketchDescriptor sc = sketchClasses.get(className); - if (!sc.fnMap.containsKey(function)) { - throw new IllegalArgumentException(String.format("The Sketch-class '%s' doesn't have a '%s' method", function)); - } - return sketchClasses.get(className).fnMap.get(function); - } - private void buildDescritors() { - for (SketchDescriptor sketchDescriptor : sketchClasses.values()) { + for (SketchDescriptor sketchDescriptor : sketchClasses) { descriptors.addAll(sketchDescriptor.fnMap.values()); } } private void buildCalciteFns() { - for (SketchDescriptor sd : sketchClasses.values()) { + for (SketchDescriptor sd : sketchClasses) { // Mergability is exposed to Calcite; which enables to use it during rollup. RelProtoDataType sketchType = RelDataTypeImpl.proto(SqlTypeName.BINARY, true); SketchFunctionDescriptor sketchSFD = sd.fnMap.get(DATA_TO_SKETCH); SketchFunctionDescriptor unionSFD = sd.fnMap.get(UNION_SKETCH); - SketchFunctionDescriptor estimateSFD = sd.fnMap.get(SKETCH_TO_ESTIMATE); if (sketchSFD == null || unionSFD == null) { continue; @@ -147,27 +128,14 @@ private void buildCalciteFns() { OperandTypes.family(), unionFn); - unionSFD.setCalciteFunction(unionFn); sketchSFD.setCalciteFunction(sketchFn); - if (estimateSFD != null && estimateSFD.getReturnRelDataType().isPresent()) { - - SqlFunction estimateFn = new HiveSqlFunction(estimateSFD.name, - SqlKind.OTHER_FUNCTION, - ReturnTypes.explicit(estimateSFD.getReturnRelDataType().get().getSqlTypeName()), - InferTypes.ANY_NULLABLE, - OperandTypes.family(), - SqlFunctionCategory.USER_DEFINED_FUNCTION, - true, - false); - - estimateSFD.setCalciteFunction(estimateFn); - } } } + private void registerHiveFunctionsInternal(Registry system) { - for (SketchDescriptor sketchDescriptor : sketchClasses.values()) { + for (SketchDescriptor sketchDescriptor : sketchClasses) { Collection functions = sketchDescriptor.fnMap.values(); for (SketchFunctionDescriptor fn : functions) { if (UDF.class.isAssignableFrom(fn.udfClass)) { @@ -197,7 +165,6 @@ private void registerHiveFunctionsInternal(Registry system) { String name; Class udfClass; private SqlFunction calciteFunction; - private Class returnType; public SketchFunctionDescriptor(String name, Class udfClass) { this.name = name; @@ -214,19 +181,6 @@ public String getFunctionName() { return name; } - public Optional getReturnRelDataType() { - if (returnType == null) { - return Optional.empty(); - } else { - JavaTypeFactoryImpl typeFactory = new JavaTypeFactoryImpl(new HiveTypeSystemImpl()); - return Optional.of(typeFactory.createType(returnType)); - } - } - - public void setReturnType(Class returnType) { - this.returnType = returnType; - } - @Override public Optional getCalciteFunction() { return Optional.ofNullable(calciteFunction); @@ -235,11 +189,6 @@ public void setReturnType(Class returnType) { public void setCalciteFunction(SqlFunction calciteFunction) { this.calciteFunction = calciteFunction; } - - @Override - public String toString() { - return getClass().getCanonicalName() + "[" + name + "]"; - } } private static class SketchDescriptor { @@ -252,28 +201,7 @@ public SketchDescriptor(String string) { } private void register(String name, Class clazz) { - SketchFunctionDescriptor value = new SketchFunctionDescriptor(functionPrefix + name, clazz); - if (UDF.class.isAssignableFrom(clazz)) { - Optional evaluateMethod = getEvaluateMethod(clazz); - if (evaluateMethod.isPresent()) { - value.setReturnType(evaluateMethod.get().getReturnType()); - } - } - fnMap.put(name, value); - } - - private Optional getEvaluateMethod(Class clazz) { - List evaluateMethods = new ArrayList(); - for (Method method : clazz.getMethods()) { - if ("evaluate".equals(method.getName())) { - evaluateMethods.add(method); - } - } - if (evaluateMethods.size() > 0) { - return Optional.of(evaluateMethods.get(0)); - } else { - return Optional.empty(); - } + fnMap.put(name, new SketchFunctionDescriptor(functionPrefix + name, clazz)); } } @@ -286,7 +214,7 @@ private void registerHll() { sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.hll.SketchToStringUDF.class); sd.register(UNION_SKETCH1, org.apache.datasketches.hive.hll.UnionSketchUDF.class); sd.register(UNION_SKETCH, org.apache.datasketches.hive.hll.UnionSketchUDAF.class); - sketchClasses.put("hll", sd); + sketchClasses.add(sd); } private void registerCpc() { @@ -300,7 +228,7 @@ private void registerCpc() { sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.cpc.SketchToStringUDF.class); sd.register(UNION_SKETCH1, org.apache.datasketches.hive.cpc.UnionSketchUDF.class); sd.register(UNION_SKETCH, org.apache.datasketches.hive.cpc.UnionSketchUDAF.class); - sketchClasses.put("cpc", sd); + sketchClasses.add(sd); } private void registerKll() { @@ -316,7 +244,7 @@ private void registerKll() { sd.register(GET_QUANTILES, org.apache.datasketches.hive.kll.GetQuantilesUDF.class); sd.register(GET_QUANTILE, org.apache.datasketches.hive.kll.GetQuantileUDF.class); sd.register(GET_RANK, org.apache.datasketches.hive.kll.GetRankUDF.class); - sketchClasses.put("kll", sd); + sketchClasses.add(sd); } private void registerTheta() { @@ -330,7 +258,7 @@ private void registerTheta() { sd.register(INTERSECT_SKETCH, org.apache.datasketches.hive.theta.IntersectSketchUDAF.class); sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.theta.EstimateSketchUDF.class); sd.register(EXCLUDE_SKETCH, org.apache.datasketches.hive.theta.ExcludeSketchUDF.class); - sketchClasses.put("theta", sd); + sketchClasses.add(sd); } @@ -356,7 +284,7 @@ private void registerTupleArrayOfDoubles() { org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToQuantilesSketchUDF.class); sd.register(SKETCH_TO_VALUES, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToValuesUDTF.class); sd.register(SKETCH_TO_VARIANCES, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToVariancesUDF.class); - sketchClasses.put("tuple_arrayofdouble", sd); + sketchClasses.add(sd); } private void registerTupleDoubleSummary() { @@ -366,7 +294,7 @@ private void registerTupleDoubleSummary() { sd.register(UNION_SKETCH, org.apache.datasketches.hive.tuple.UnionDoubleSummarySketchUDAF.class); sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.tuple.DoubleSummarySketchToEstimatesUDF.class); sd.register(SKETCH_TO_PERCENTILE, org.apache.datasketches.hive.tuple.DoubleSummarySketchToPercentileUDF.class); - sketchClasses.put("tuple_doublesummary", sd); + sketchClasses.add(sd); } private void registerQuantiles() { @@ -384,7 +312,7 @@ private void registerFrequencies() { sd.register(UNION_SKETCH, org.apache.datasketches.hive.frequencies.UnionStringsSketchUDAF.class); sd.register(GET_FREQUENT_ITEMS, org.apache.datasketches.hive.frequencies.GetFrequentItemsFromStringsSketchUDTF.class); - sketchClasses.put("freq", sd); + sketchClasses.add(sd); } private void registerQuantilesString() { @@ -399,7 +327,7 @@ private void registerQuantilesString() { sd.register(GET_PMF, org.apache.datasketches.hive.quantiles.GetPmfFromStringsSketchUDF.class); sd.register(GET_QUANTILE, org.apache.datasketches.hive.quantiles.GetQuantileFromStringsSketchUDF.class); sd.register(GET_QUANTILES, org.apache.datasketches.hive.quantiles.GetQuantilesFromStringsSketchUDF.class); - sketchClasses.put("quantile_strings", sd); + sketchClasses.add(sd); } private void registerQuantilesDoubles() { @@ -414,7 +342,7 @@ private void registerQuantilesDoubles() { sd.register(GET_PMF, org.apache.datasketches.hive.quantiles.GetPmfFromDoublesSketchUDF.class); sd.register(GET_QUANTILE, org.apache.datasketches.hive.quantiles.GetQuantileFromDoublesSketchUDF.class); sd.register(GET_QUANTILES, org.apache.datasketches.hive.quantiles.GetQuantilesFromDoublesSketchUDF.class); - sketchClasses.put("quantile_doubles", sd); + sketchClasses.add(sd); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java index b94e3fdbfa..7220f33c8c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java @@ -739,21 +739,20 @@ public void process(Object row, int tag) throws HiveException { // if hash aggregation is not behaving properly, disable it if (numRowsInput == numRowsCompareHashAggr) { numRowsCompareHashAggr += groupbyMapAggrInterval; - long numRowsProcessed = groupingSetsPresent ? numRowsInput * groupingSets.size() : numRowsInput; // map-side aggregation should reduce the entries by at-least half - if (numRowsHashTbl > numRowsProcessed * minReductionHashAggr) { + if (numRowsHashTbl > numRowsInput * minReductionHashAggr) { LOG.warn("Disable Hash Aggr: #hash table = " + numRowsHashTbl - + " #numRowsInput = " + numRowsInput + " reduction = " + 1.0 * (numRowsHashTbl / numRowsProcessed) - + " minReduction = " + minReductionHashAggr + " groupingSetsPresent = " + groupingSetsPresent - + " numRowsProcessed = " + numRowsProcessed); + + " #total = " + numRowsInput + " reduction = " + 1.0 + * (numRowsHashTbl / numRowsInput) + " minReduction = " + + minReductionHashAggr); flushHashTable(true); hashAggr = false; } else { if (LOG.isTraceEnabled()) { - LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl + " #numRowsInput = " + numRowsInput - + " reduction = " + 1.0 * (numRowsHashTbl / numRowsProcessed) + " minReduction = " - + minReductionHashAggr + " groupingSetsPresent = " + groupingSetsPresent + " numRowsProcessed = " - + numRowsProcessed); + LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl + + " #total = " + numRowsInput + " reduction = " + 1.0 + * (numRowsHashTbl / numRowsInput) + " minReduction = " + + minReductionHashAggr); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java index 6ceea2f048..40e9e97890 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java @@ -78,7 +78,7 @@ /** * The mapping from expression function names to expression classes. */ - private final Map mFunctions = new ConcurrentHashMap(); + private final Map mFunctions = new LinkedHashMap(); private final Set> builtIns = Collections.synchronizedSet(new HashSet>()); /** * Persistent map contains refcounts that are only modified in synchronized methods for now, @@ -91,7 +91,6 @@ /** * The epic lock for the registry. This was added to replace the synchronized methods with * minimum disruption; the locking should really be made more granular here. - * This lock is protecting mFunctions, builtIns and persistent maps. */ private final ReentrantLock lock = new ReentrantLock(); @@ -332,9 +331,11 @@ private void validateClass(Class input, Class expected) { * @return */ public FunctionInfo getFunctionInfo(String functionName) throws SemanticException { + lock.lock(); + try { functionName = functionName.toLowerCase(); if (FunctionUtils.isQualifiedFunctionName(functionName)) { - FunctionInfo functionInfo = getQualifiedFunctionInfo(functionName); + FunctionInfo functionInfo = getQualifiedFunctionInfoUnderLock(functionName); addToCurrentFunctions(functionName, functionInfo); return functionInfo; } @@ -347,10 +348,14 @@ public FunctionInfo getFunctionInfo(String functionName) throws SemanticExceptio if (functionInfo == null) { functionName = FunctionUtils.qualifyFunctionName( functionName, SessionState.get().getCurrentDatabase().toLowerCase()); - functionInfo = getQualifiedFunctionInfo(functionName); + functionInfo = getQualifiedFunctionInfoUnderLock(functionName); } addToCurrentFunctions(functionName, functionInfo); return functionInfo; + } finally { + lock.unlock(); + } + } private void addToCurrentFunctions(String functionName, FunctionInfo functionInfo) { @@ -628,7 +633,7 @@ public GenericUDAFResolver getGenericUDAFResolver(String functionName) throws Se return null; } - private FunctionInfo getQualifiedFunctionInfo(String qualifiedName) throws SemanticException { + private FunctionInfo getQualifiedFunctionInfoUnderLock(String qualifiedName) throws SemanticException { FunctionInfo info = mFunctions.get(qualifiedName); if (info != null && info.isBlockedFunction()) { throw new SemanticException ("UDF " + qualifiedName + " is not allowed"); @@ -653,7 +658,15 @@ private FunctionInfo getQualifiedFunctionInfo(String qualifiedName) throws Seman if (conf == null || !HiveConf.getBoolVar(conf, ConfVars.HIVE_ALLOW_UDF_LOAD_ON_DEMAND)) { return null; } - return getFunctionInfoFromMetastoreNoLock(qualifiedName, conf); + // This is a little bit weird. We'll do the MS call outside of the lock. Our caller calls us + // under lock, so we'd preserve the lock state for them; their finally block will release the + // lock correctly. See the comment on the lock field - the locking needs to be reworked. + lock.unlock(); + try { + return getFunctionInfoFromMetastoreNoLock(qualifiedName, conf); + } finally { + lock.lock(); + } } // should be called after session registry is checked diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/LoadDatabase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/LoadDatabase.java index 85e9addcd3..52777f3b20 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/LoadDatabase.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/LoadDatabase.java @@ -117,7 +117,7 @@ private boolean isDbEmpty(String dbName) throws HiveException { private Task createDbTask(Database dbObj) { // note that we do not set location - for repl load, we want that auto-created. - CreateDatabaseDesc createDbDesc = new CreateDatabaseDesc(dbObj.getName(), dbObj.getDescription(), null, null, false, + CreateDatabaseDesc createDbDesc = new CreateDatabaseDesc(dbObj.getName(), dbObj.getDescription(), null, false, updateDbProps(dbObj, context.dumpDirectory)); // If it exists, we want this to be an error condition. Repl Load is not intended to replace a // db. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java index 84ae54157e..a0da0ad2fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java @@ -19,7 +19,6 @@ */ package org.apache.hadoop.hive.ql.exec.tez; -import java.lang.reflect.Field; import java.util.Iterator; import java.util.Map; import java.util.function.Predicate; @@ -34,7 +33,6 @@ public class TezConfigurationFactory { private static TezConfiguration defaultConf = new TezConfiguration(); - private static final Field updatingResource; private static final Logger LOG = LoggerFactory.getLogger(TezConfigurationFactory.class.getName()); @@ -43,14 +41,6 @@ String sslConf = defaultConf.get(SSL_CLIENT_CONF_KEY, "ssl-client.xml"); defaultConf.addResource(sslConf); LOG.info("SSL conf : " + sslConf); - try { - //Cache the field handle so that we can avoid expensive conf.getPropertySources(key) later - updatingResource = Configuration.class.getDeclaredField("updatingResource"); - } catch (NoSuchFieldException | SecurityException e) { - throw new RuntimeException(e); - } - updatingResource.setAccessible(true); - } public static Configuration copyInto(Configuration target, Configuration src, @@ -60,12 +50,7 @@ public static Configuration copyInto(Configuration target, Configuration src, Map.Entry entry = iter.next(); String name = entry.getKey(); String value = entry.getValue(); - String[] sources; - try { - sources = ((Map)updatingResource.get(src)).get(name); - } catch (IllegalArgumentException | IllegalAccessException e) { - throw new RuntimeException(e); - } + String[] sources = src.getPropertySources(name); final String source; if (sources == null || sources.length == 0) { source = null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java index 528b8f5a7c..79b073f93a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java @@ -79,20 +79,18 @@ public void process(Object row, int tag) throws HiveException { } //skip skipSize rows of batch batch.size = Math.min(batch.size, offset + limit - currCount); - int newBatchSize = batch.size - skipSize; if (batch.selectedInUse == false) { batch.selectedInUse = true; - for (int i = 0; i < newBatchSize; i++) { + for (int i = 0; i < batch.size - skipSize; i++) { batch.selected[i] = skipSize + i; } } else { - for (int i = 0; i < newBatchSize; i++) { + for (int i = 0; i < batch.size - skipSize; i++) { batch.selected[i] = batch.selected[skipSize + i]; } } - currCount += batch.size; - batch.size = newBatchSize; vectorForward(batch); + currCount += batch.size; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java index 8e436bcd63..6082321a90 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java @@ -14,8 +14,6 @@ package org.apache.hadoop.hive.ql.io.parquet.convert; import java.math.BigDecimal; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Map; import java.util.Optional; @@ -45,7 +43,6 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; -import org.apache.parquet.Preconditions; import org.apache.parquet.column.Dictionary; import org.apache.parquet.io.api.Binary; import org.apache.parquet.io.api.PrimitiveConverter; @@ -665,25 +662,9 @@ protected HiveDecimalWritable convert(Binary binary) { }; } }, - EINT96_TIMESTAMP_CONVERTER(TimestampWritableV2.class) { + ETIMESTAMP_CONVERTER(TimestampWritableV2.class) { @Override PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { - if (hiveTypeInfo != null) { - String typeName = TypeInfoUtils.getBaseName(hiveTypeInfo.getTypeName()); - switch (typeName) { - case serdeConstants.BIGINT_TYPE_NAME: - return new BinaryConverter(type, parent, index) { - @Override - protected LongWritable convert(Binary binary) { - Preconditions.checkArgument(binary.length() == 12, "Must be 12 bytes"); - ByteBuffer buf = binary.toByteBuffer(); - buf.order(ByteOrder.LITTLE_ENDIAN); - long longVal = buf.getLong(); - return new LongWritable(longVal); - } - }; - } - } return new BinaryConverter(type, parent, index) { @Override protected TimestampWritableV2 convert(Binary binary) { @@ -709,22 +690,6 @@ protected TimestampWritableV2 convert(Binary binary) { @Override PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { - if (hiveTypeInfo != null) { - String typeName = TypeInfoUtils.getBaseName(hiveTypeInfo.getTypeName()); - switch (typeName) { - case serdeConstants.BIGINT_TYPE_NAME: - return new BinaryConverter(type, parent, index) { - @Override - protected LongWritable convert(Binary binary) { - Preconditions.checkArgument(binary.length() == 8, "Must be 8 bytes"); - ByteBuffer buf = binary.toByteBuffer(); - buf.order(ByteOrder.LITTLE_ENDIAN); - long longVal = buf.getLong(); - return new LongWritable(longVal); - } - }; - } - } return new PrimitiveConverter() { @Override public void addLong(final long value) { @@ -770,7 +735,8 @@ private ETypeConverter(final Class type) { public static PrimitiveConverter getNewConverter(final PrimitiveType type, final int index, final ConverterParent parent, final TypeInfo hiveTypeInfo) { if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) { - return EINT96_TIMESTAMP_CONVERTER.getConverter(type, index, parent, hiveTypeInfo); + //TODO- cleanup once parquet support Timestamp type annotation. + return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent, hiveTypeInfo); } if (type.getLogicalTypeAnnotation() != null) { Optional converter = type.getLogicalTypeAnnotation() diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java index 84396e1885..fb5a306ac4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java @@ -140,9 +140,9 @@ else if(l.txnId == 0) { } locks.add(hl); if (res.getState() != LockState.ACQUIRED) { - LOG.error("Unable to acquire locks for lockId={} after {} retries (retries took {} ms). QueryId={}\n{}", - res.getLockid(), numRetries, retryDuration, queryId, res); - if (res.getState() == LockState.WAITING) { + if(res.getState() == LockState.WAITING) { + LOG.error("Unable to acquire locks for lockId={} after {} retries (retries took {} ms). QueryId={}", + res.getLockid(), numRetries, retryDuration, queryId); /** * the {@link #unlock(HiveLock)} here is more about future proofing when support for * multi-statement txns is added. In that case it's reasonable for the client diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java index deaab89c1f..b4dac4346e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java @@ -99,8 +99,8 @@ Licensed to the Apache Software Foundation (ASF) under one private volatile DbLockManager lockMgr = null; /** - * The Metastore TXNS sequence is initialized to 1. - * Thus is 1 is first transaction id. + * The Metastore NEXT_TXN_ID.NTXN_NEXT is initialized to 1; it contains the next available + * transaction id. Thus is 1 is first transaction id. */ private volatile long txnId = 0; @@ -412,8 +412,6 @@ LockState acquireLocks(QueryPlan plan, Context ctx, String username, boolean isB rqstBuilder.setTransactionId(txnId) .setUser(username); - rqstBuilder.setZeroWaitReadEnabled(!conf.getBoolVar(HiveConf.ConfVars.TXN_OVERWRITE_X_LOCK) || - !conf.getBoolVar(HiveConf.ConfVars.TXN_WRITE_X_LOCK)); // Make sure we need locks. It's possible there's nothing to lock in // this operation. @@ -422,10 +420,11 @@ LockState acquireLocks(QueryPlan plan, Context ctx, String username, boolean isB return null; } List lockComponents = AcidUtils.makeLockComponents(plan.getOutputs(), plan.getInputs(), conf); + lockComponents.addAll(getGlobalLocks(ctx.getConf())); //It's possible there's nothing to lock even if we have w/r entities. - if (lockComponents.isEmpty()) { + if(lockComponents.isEmpty()) { LOG.debug("No locks needed for queryId=" + queryId); return null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index bd1fae1143..8d194c384b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -89,7 +89,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.HiveStatsUtils; @@ -2879,12 +2878,7 @@ private void constructOneLBLocationMap(FileStatus fSta, // calculate full path spec for each valid partition path validPartitions.forEach(partPath -> { Map fullPartSpec = Maps.newLinkedHashMap(partSpec); - String staticParts = Warehouse.makeDynamicPartName(partSpec); - Path computedPath = partPath; - if (!staticParts.isEmpty() ) { - computedPath = new Path(new Path(partPath.getParent(), staticParts), partPath.getName()); - } - if (!Warehouse.makeSpecFromName(fullPartSpec, computedPath, new HashSet<>(partSpec.keySet()))) { + if (!Warehouse.makeSpecFromName(fullPartSpec, partPath, new HashSet<>(partSpec.keySet()))) { Utilities.FILE_OP_LOGGER.warn("Ignoring invalid DP directory " + partPath); } else { PartitionDetails details = new PartitionDetails(); @@ -4127,7 +4121,7 @@ private static void copyFiles(final HiveConf conf, final FileSystem destFs, Arrays.sort(files); for (final FileStatus srcFile : files) { final Path srcP = srcFile.getPath(); - final boolean needToCopy = needToCopy(conf, srcP, destf, srcFs, destFs, configuredOwner, isManaged); + final boolean needToCopy = needToCopy(srcP, destf, srcFs, destFs, configuredOwner, isManaged); final boolean isRenameAllowed = !needToCopy && !isSrcLocal; @@ -4442,7 +4436,7 @@ public static boolean moveFile(final HiveConf conf, Path srcf, final Path destf, destFs.copyFromLocalFile(srcf, destf); return true; } else { - if (needToCopy(conf, srcf, destf, srcFs, destFs, configuredOwner, isManaged)) { + if (needToCopy(srcf, destf, srcFs, destFs, configuredOwner, isManaged)) { //copy if across file system or encryption zones. LOG.debug("Copying source " + srcf + " to " + destf + " because HDFS encryption zones are different."); return FileUtils.copy(srcf.getFileSystem(conf), srcf, destf.getFileSystem(conf), destf, @@ -4567,7 +4561,7 @@ static private HiveException getHiveException(Exception e, String msg, String lo * TODO- consider if need to do this for different file authority. * @throws HiveException */ - static private boolean needToCopy(final HiveConf conf, Path srcf, Path destf, FileSystem srcFs, + static private boolean needToCopy(Path srcf, Path destf, FileSystem srcFs, FileSystem destFs, String configuredOwner, boolean isManaged) throws HiveException { //Check if different FileSystems if (!FileUtils.equalsFileSystem(srcFs, destFs)) { @@ -4608,10 +4602,6 @@ static private boolean needToCopy(final HiveConf conf, Path srcf, Path destf, Fi } } - // if Encryption not enabled, no copy needed - if (!DFSUtilClient.isHDFSEncryptionEnabled(conf)) { - return false; - } //Check if different encryption zones HadoopShims.HdfsEncryptionShim srcHdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim(srcFs); HadoopShims.HdfsEncryptionShim destHdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim(destFs); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java index d64ae448cd..470057301b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java @@ -149,6 +149,7 @@ public void showTablesExtended(DataOutputStream out, List tables) // In case the query is served by HiveServer2, don't pad it with spaces, // as HiveServer2 output is consumed by JDBC/ODBC clients. out.write(mdt.renderTable(!SessionState.get().isHiveServerQuery()).getBytes("UTF-8")); + out.write(terminator); } catch (IOException e) { throw new HiveException(e); } @@ -197,6 +198,7 @@ public void showMaterializedViews(DataOutputStream out, List
materialized // In case the query is served by HiveServer2, don't pad it with spaces, // as HiveServer2 output is consumed by JDBC/ODBC clients. out.write(mdt.renderTable(!SessionState.get().isHiveServerQuery()).getBytes("UTF-8")); + out.write(terminator); } catch (IOException e) { throw new HiveException(e); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRewriteCountDistinctToDataSketches.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRewriteCountDistinctToDataSketches.java deleted file mode 100644 index c23e2c4938..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRewriteCountDistinctToDataSketches.java +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to you under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.calcite.rules; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import org.apache.calcite.plan.RelOptRule; -import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.rel.RelCollation; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Aggregate; -import org.apache.calcite.rel.core.AggregateCall; -import org.apache.calcite.rel.core.RelFactories.ProjectFactory; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.sql.SqlAggFunction; -import org.apache.calcite.sql.SqlOperator; -import org.apache.hadoop.hive.ql.exec.DataSketchesFunctions; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; -import org.apache.hive.plugin.api.HiveUDFPlugin.UDFDescriptor; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.collect.ImmutableList; - -/** - * This rule could rewrite {@code count(distinct(x))} calls to be calculated using sketch based functions. - * - * The transformation here works on Aggregate nodes; the operations done are the following: - * - * 1. Identify candidate {@code count(distinct)} aggregate calls - * 2. A new Aggregate is created in which the aggregation is done by the sketch function - * 3. A new Project is inserted on top of the Aggregate; which unwraps the resulting - * count-distinct estimation from the sketch representation - */ -public final class HiveRewriteCountDistinctToDataSketches extends RelOptRule { - - protected static final Logger LOG = LoggerFactory.getLogger(HiveRewriteCountDistinctToDataSketches.class); - private final String sketchClass; - private final ProjectFactory projectFactory; - - public HiveRewriteCountDistinctToDataSketches(String sketchClass) { - super(operand(HiveAggregate.class, any())); - this.sketchClass = sketchClass; - projectFactory = HiveRelFactories.HIVE_PROJECT_FACTORY; - } - - @Override - public void onMatch(RelOptRuleCall call) { - final Aggregate aggregate = call.rel(0); - - if (aggregate.getGroupSets().size() != 1) { - // not yet supported - return; - } - - List newAggCalls = new ArrayList(); - - VBuilder vb = new VBuilder(aggregate); - - if (aggregate.getAggCallList().equals(vb.newAggCalls)) { - // rule didn't made any changes - return; - } - - newAggCalls = vb.newAggCalls; - RelNode newAgg = aggregate.copy(aggregate.getTraitSet(), aggregate.getInput(), aggregate.getGroupSet(), - aggregate.getGroupSets(), newAggCalls); - - RelNode newProject = projectFactory.createProject(newAgg, vb.newProjects, aggregate.getRowType().getFieldNames()); - - call.transformTo(newProject); - return; - } - - /** - * Helper class to help in building a new Aggregate and Project. - */ - // NOTE: methods in this class are not re-entrant; drop-to-frame to constructor during debugging - class VBuilder { - - private Aggregate aggregate; - private List newAggCalls; - private List newProjects; - private final RexBuilder rexBuilder; - - public VBuilder(Aggregate aggregate) { - this.aggregate = aggregate; - newAggCalls = new ArrayList(); - newProjects = new ArrayList(); - rexBuilder = aggregate.getCluster().getRexBuilder(); - - // add non-aggregated fields - as identity projections - addGroupFields(); - - for (AggregateCall aggCall : aggregate.getAggCallList()) { - processAggCall(aggCall); - } - } - - private void addGroupFields() { - for (int i = 0; i < aggregate.getGroupCount(); i++) { - newProjects.add(rexBuilder.makeInputRef(aggregate, 0)); - } - } - - private void processAggCall(AggregateCall aggCall) { - if (isSimpleCountDistinct(aggCall)) { - rewriteCountDistinct(aggCall); - return; - } - appendAggCall(aggCall, null); - } - - private void appendAggCall(AggregateCall aggCall, SqlOperator projectOperator) { - RelDataType origType = aggregate.getRowType().getFieldList().get(newProjects.size()).getType(); - RexNode projRex = rexBuilder.makeInputRef(aggCall.getType(), newProjects.size()); - if (projectOperator != null) { - projRex = rexBuilder.makeCall(projectOperator, ImmutableList.of(projRex)); - projRex = rexBuilder.makeCast(origType, projRex); - } - newAggCalls.add(aggCall); - newProjects.add(projRex); - } - - private boolean isSimpleCountDistinct(AggregateCall aggCall) { - return aggCall.isDistinct() && aggCall.getArgList().size() == 1 - && aggCall.getAggregation().getName().equalsIgnoreCase("count") && !aggCall.hasFilter(); - } - - private void rewriteCountDistinct(AggregateCall aggCall) { - SqlAggFunction aggFunction = (SqlAggFunction) getSqlOperator(DataSketchesFunctions.DATA_TO_SKETCH); - boolean distinct = false; - boolean approximate = true; - boolean ignoreNulls = aggCall.ignoreNulls(); - List argList = aggCall.getArgList(); - int filterArg = aggCall.filterArg; - RelCollation collation = aggCall.getCollation(); - int groupCount = aggregate.getGroupCount(); - RelNode input = aggregate.getInput(); - RelDataType type = rexBuilder.deriveReturnType(aggFunction, Collections.emptyList()); - String name = aggFunction.getName(); - - AggregateCall ret = AggregateCall.create(aggFunction, distinct, approximate, ignoreNulls, argList, filterArg, - collation, groupCount, input, type, name); - - appendAggCall(ret, getSqlOperator(DataSketchesFunctions.SKETCH_TO_ESTIMATE)); - } - - private SqlOperator getSqlOperator(String fnName) { - UDFDescriptor fn = DataSketchesFunctions.INSTANCE.getSketchFunction(sketchClass, fnName); - if (!fn.getCalciteFunction().isPresent()) { - throw new RuntimeException(fn.toString() + " doesn't have a Calcite function associated with it"); - } - return fn.getCalciteFunction().get(); - } - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 7b34e91139..f94a9f9d70 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -236,7 +236,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRemoveGBYSemiJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRemoveSqCountCheck; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRewriteCountDistinctToDataSketches; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSemiJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortJoinReduceRule; @@ -1969,14 +1968,6 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv HiveExceptRewriteRule.INSTANCE); //1. Distinct aggregate rewrite - if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_ENABLED)) { - // Rewrite to datasketches if enabled - if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_COUNTDISTINCT_ENABLED)) { - String sketchClass = conf.getVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_COUNT_DISTINCT_SKETCH); - generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN, - new HiveRewriteCountDistinctToDataSketches(sketchClass)); - } - } // Run this optimization early, since it is expanding the operator pipeline. if (!conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") && conf.getBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEDISTINCTREWRITE)) { @@ -2287,7 +2278,7 @@ private RelNode copyNodeScan(RelNode scan) { RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT); RelMetadataQuery mq = RelMetadataQuery.instance(); RelOptCost costOriginalPlan = mq.getCumulativeCost(calcitePreMVRewritingPlan); - final double factorSelectivity = HiveConf.getFloatVar( + final double factorSelectivity = (double) HiveConf.getFloatVar( conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REBUILD_INCREMENTAL_FACTOR); RelOptCost costRebuildPlan = mq.getCumulativeCost(basePlan).multiplyBy(factorSelectivity); if (costOriginalPlan.isLe(costRebuildPlan)) { @@ -3273,13 +3264,7 @@ private RelNode genFilterRelNode(ASTNode filterNode, RelNode srcRel, ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean useCaching) throws SemanticException { RexNode filterExpression = genRexNode(filterNode, relToHiveRR.get(srcRel), - outerRR, null, useCaching, cluster.getRexBuilder()); - - return genFilterRelNode(filterExpression, srcRel, outerNameToPosMap, outerRR); - } - - private RelNode genFilterRelNode(RexNode filterExpression, RelNode srcRel, - ImmutableMap outerNameToPosMap, RowResolver outerRR) throws SemanticException { + outerRR, null, useCaching, cluster.getRexBuilder()); if (RexUtil.isLiteral(filterExpression, false) && filterExpression.getType().getSqlTypeName() != SqlTypeName.BOOLEAN) { // queries like select * from t1 where 'foo'; @@ -3599,35 +3584,6 @@ private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, ImmutableMap genConstraintFilterLogicalPlan( - QB qb, RelNode srcRel, ImmutableMap outerNameToPosMap, RowResolver outerRR) - throws SemanticException { - if (qb.getIsQuery()) { - return null; - } - - String dest = qb.getParseInfo().getClauseNames().iterator().next(); - if (!updating(dest)) { - return null; - } - - RowResolver inputRR = relToHiveRR.get(srcRel); - RexNode constraintUDF = RexNodeTypeCheck.genConstraintsExpr( - conf, cluster.getRexBuilder(), getTargetTable(qb, dest), updating(dest), inputRR); - if (constraintUDF == null) { - return null; - } - - RelNode constraintRel = genFilterRelNode(constraintUDF, srcRel, outerNameToPosMap, outerRR); - return new Pair<>(constraintRel, inputRR); - } - private AggregateCall convertGBAgg(AggregateInfo agg, List gbChildProjLst, HashMap rexNodeToPosMap, Integer childProjLstIndx) throws SemanticException { // 1. Get agg fn ret type in Calcite @@ -5171,13 +5127,6 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, selectRel = selPair.getKey(); srcRel = (selectRel == null) ? srcRel : selectRel; - // Build Rel for Constraint checks - Pair constraintPair = - genConstraintFilterLogicalPlan(qb, srcRel, outerNameToPosMap, outerRR); - if (constraintPair != null) { - selPair = constraintPair; - } - // 6. Build Rel for OB Clause obRel = genOBLogicalPlan(qb, selPair, outerMostQB); srcRel = (obRel == null) ? srcRel : obRel; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 2da3786d7c..0de3730351 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -58,6 +58,8 @@ import org.antlr.runtime.tree.Tree; import org.antlr.runtime.tree.TreeVisitor; import org.antlr.runtime.tree.TreeVisitorAction; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.util.ImmutableBitSet; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; @@ -160,12 +162,14 @@ import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; import org.apache.hadoop.hive.ql.lockmgr.LockException; +import org.apache.hadoop.hive.ql.metadata.CheckConstraint; import org.apache.hadoop.hive.ql.metadata.DefaultConstraint; import org.apache.hadoop.hive.ql.metadata.DummyPartition; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.InvalidTableException; +import org.apache.hadoop.hive.ql.metadata.NotNullConstraint; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient; import org.apache.hadoop.hive.ql.metadata.Table; @@ -7109,6 +7113,109 @@ private void setStatsForNonNativeTable(String dbName, String tableName) throws S this.rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc))); } + + private void replaceColumnReference(ASTNode checkExpr, Map col2Col, + RowResolver inputRR){ + if(checkExpr.getType() == HiveParser.TOK_TABLE_OR_COL) { + ASTNode oldColChild = (ASTNode)(checkExpr.getChild(0)); + String oldColRef = oldColChild.getText().toLowerCase(); + assert(col2Col.containsKey(oldColRef)); + String internalColRef = col2Col.get(oldColRef); + String fullQualColRef[] = inputRR.reverseLookup(internalColRef); + String newColRef = fullQualColRef[1]; + checkExpr.deleteChild(0); + checkExpr.addChild(ASTBuilder.createAST(oldColChild.getType(), newColRef)); + } + else { + for(int i=0; i< checkExpr.getChildCount(); i++) { + replaceColumnReference((ASTNode)(checkExpr.getChild(i)), col2Col, inputRR); + } + } + } + + private ExprNodeDesc getCheckConstraintExpr(Table tbl, Operator input, RowResolver inputRR, String dest) + throws SemanticException{ + + CheckConstraint cc = null; + try { + cc = Hive.get().getEnabledCheckConstraints(tbl.getDbName(), tbl.getTableName()); + } + catch(HiveException e) { + throw new SemanticException(e); + } + if(cc == null || cc.getCheckConstraints().isEmpty()) { + return null; + } + + // build a map which tracks the name of column in input's signature to corresponding table column name + // this will be used to replace column references in CHECK expression AST with corresponding column name + // in input + Map col2Cols = new HashMap<>(); + List colInfos = input.getSchema().getSignature(); + int colIdx = 0; + if(updating(dest)) { + // if this is an update we need to skip the first col since it is row id + colIdx = 1; + } + for(FieldSchema fs: tbl.getCols()) { + // since SQL is case insenstive just to make sure that the comparison b/w column names + // and check expression's column reference work convert the key to lower case + col2Cols.put(fs.getName().toLowerCase(), colInfos.get(colIdx).getInternalName()); + colIdx++; + } + + List checkExprStrs = cc.getCheckExpressionList(); + TypeCheckCtx typeCheckCtx = new TypeCheckCtx(inputRR); + ExprNodeDesc checkAndExprs = null; + for(String checkExprStr:checkExprStrs) { + try { + ParseDriver parseDriver = new ParseDriver(); + ASTNode checkExprAST = parseDriver.parseExpression(checkExprStr); + //replace column references in checkExprAST with corresponding columns in input + replaceColumnReference(checkExprAST, col2Cols, inputRR); + Map genExprs = ExprNodeTypeCheck + .genExprNode(checkExprAST, typeCheckCtx); + ExprNodeDesc checkExpr = genExprs.get(checkExprAST); + // Check constraint fails only if it evaluates to false, NULL/UNKNOWN should evaluate to TRUE + ExprNodeDesc notFalseCheckExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor(). + getFuncExprNodeDesc("isnotfalse", checkExpr); + if(checkAndExprs == null) { + checkAndExprs = notFalseCheckExpr; + } + else { + checkAndExprs = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor(). + getFuncExprNodeDesc("and", checkAndExprs, notFalseCheckExpr); + } + } catch(Exception e) { + throw new SemanticException(e); + } + } + return checkAndExprs; + } + + private ImmutableBitSet getEnabledNotNullConstraints(Table tbl) throws HiveException{ + final NotNullConstraint nnc = Hive.get().getEnabledNotNullConstraints( + tbl.getDbName(), tbl.getTableName()); + ImmutableBitSet bitSet = null; + if(nnc == null || nnc.getNotNullConstraints().isEmpty()) { + return bitSet; + } + // Build the bitset with not null columns + ImmutableBitSet.Builder builder = ImmutableBitSet.builder(); + for (String nnCol : nnc.getNotNullConstraints().values()) { + int nnPos = -1; + for (int i = 0; i < tbl.getCols().size(); i++) { + if (tbl.getCols().get(i).getName().equals(nnCol)) { + nnPos = i; + builder.set(nnPos); + break; + } + } + } + bitSet = builder.build(); + return bitSet; + } + private boolean mergeCardinalityViolationBranch(final Operator input) { if(input instanceof SelectOperator) { SelectOperator selectOp = (SelectOperator)input; @@ -7124,43 +7231,104 @@ private boolean mergeCardinalityViolationBranch(final Operator input) { } private Operator genConstraintsPlan(String dest, QB qb, Operator input) throws SemanticException { - if (deleting(dest)) { + if(deleting(dest)) { // for DELETE statements NOT NULL constraint need not be checked return input; } //MERGE statements could have inserted a cardinality violation branch, we need to avoid that - if (mergeCardinalityViolationBranch(input)) { + if(mergeCardinalityViolationBranch(input)){ return input; } // if this is an insert into statement we might need to add constraint check - assert (input.getParentOperators().size() == 1); + Table targetTable = null; + Integer dest_type = qb.getMetaData().getDestTypeForAlias(dest); + if(dest_type == QBMetaData.DEST_TABLE) { + targetTable= qb.getMetaData().getDestTableForAlias(dest); + + } + else if(dest_type == QBMetaData.DEST_PARTITION){ + Partition dest_part = qb.getMetaData().getDestPartitionForAlias(dest); + targetTable = dest_part.getTable(); + + } + else { + throw new SemanticException("Generating constraint check plan: Invalid target type: " + dest); + } + RowResolver inputRR = opParseCtx.get(input).getRowResolver(); - Table targetTable = getTargetTable(qb, dest); - ExprNodeDesc combinedConstraintExpr = - ExprNodeTypeCheck.genConstraintsExpr(conf, targetTable, updating(dest), inputRR); + ExprNodeDesc nullConstraintExpr = getNotNullConstraintExpr(targetTable, input, dest); + ExprNodeDesc checkConstraintExpr = getCheckConstraintExpr(targetTable, input, inputRR, dest); + + ExprNodeDesc combinedConstraintExpr = null; + if(nullConstraintExpr != null && checkConstraintExpr != null) { + assert (input.getParentOperators().size() == 1); + combinedConstraintExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() + .getFuncExprNodeDesc("and", nullConstraintExpr, checkConstraintExpr); + + } + else if(nullConstraintExpr != null) { + combinedConstraintExpr = nullConstraintExpr; + } + else if(checkConstraintExpr != null) { + combinedConstraintExpr = checkConstraintExpr; + } if (combinedConstraintExpr != null) { + ExprNodeDesc constraintUDF = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() + .getFuncExprNodeDesc("enforce_constraint", combinedConstraintExpr); return putOpInsertMap(OperatorFactory.getAndMakeChild( - new FilterDesc(combinedConstraintExpr, false), new RowSchema( + new FilterDesc(constraintUDF, false), new RowSchema( inputRR.getColumnInfos()), input), inputRR); } return input; } - protected Table getTargetTable(QB qb, String dest) throws SemanticException { - Integer dest_type = qb.getMetaData().getDestTypeForAlias(dest); - if (dest_type == QBMetaData.DEST_TABLE) { - return qb.getMetaData().getDestTableForAlias(dest); + private ExprNodeDesc getNotNullConstraintExpr(Table targetTable, Operator input, String dest) throws SemanticException { + boolean forceNotNullConstraint = conf.getBoolVar(ConfVars.HIVE_ENFORCE_NOT_NULL_CONSTRAINT); + if(!forceNotNullConstraint) { + return null; + } - } else if (dest_type == QBMetaData.DEST_PARTITION) { - Partition dest_part = qb.getMetaData().getDestPartitionForAlias(dest); - return dest_part.getTable(); + ImmutableBitSet nullConstraintBitSet = null; + try { + nullConstraintBitSet = getEnabledNotNullConstraints(targetTable); + } catch (Exception e) { + if (e instanceof SemanticException) { + throw (SemanticException) e; + } else { + throw (new RuntimeException(e)); + } + } - } else { - throw new SemanticException("Generating constraint check plan: Invalid target type: " + dest); + if(nullConstraintBitSet == null) { + return null; + } + List colInfos = input.getSchema().getSignature(); + + ExprNodeDesc currUDF = null; + // Add NOT NULL constraints + int constraintIdx = 0; + for(int colExprIdx=0; colExprIdx < colInfos.size(); colExprIdx++) { + if(updating(dest) && colExprIdx == 0) { + // for updates first column is _rowid + continue; + } + if (nullConstraintBitSet.indexOf(constraintIdx) != -1) { + ExprNodeDesc currExpr = ExprNodeTypeCheck.toExprNode(colInfos.get(colExprIdx), null); + ExprNodeDesc isNotNullUDF = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() + .getFuncExprNodeDesc("isnotnull", currExpr); + if (currUDF != null) { + currUDF = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() + .getFuncExprNodeDesc("and", currUDF, isNotNullUDF); + } else { + currUDF = isNotNullUDF; + } + } + constraintIdx++; } + return currUDF; } private Path getDestinationFilePath(final String destinationFile, boolean isMmTable) { @@ -14886,7 +15054,7 @@ private boolean isAcidOutputFormat(Class of) { } } - protected boolean updating(String destination) { + private boolean updating(String destination) { return destination.startsWith(Context.DestClausePrefix.UPDATE.toString()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FunctionSerializer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FunctionSerializer.java index 420c9bfd4d..733bab522f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FunctionSerializer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FunctionSerializer.java @@ -58,7 +58,8 @@ public void writeTo(JsonWriter writer, ReplicationSpec additionalPropertiesProvi FileSystem fileSystem = inputPath.getFileSystem(hiveConf); Path qualifiedUri = PathBuilder.fullyQualifiedHDFSUri(inputPath, fileSystem); String checkSum = ReplChangeManager.checksumFor(qualifiedUri, fileSystem); - String newFileUri = ReplChangeManager.encodeFileUri(qualifiedUri.toString(), checkSum, null); + String newFileUri = ReplChangeManager.getInstance(hiveConf) + .encodeFileUri(qualifiedUri.toString(), checkSum, null); resourceUris.add(new ResourceUri(uri.getResourceType(), newFileUri)); } else { resourceUris.add(uri); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/CreateDatabaseHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/CreateDatabaseHandler.java index c2e652a1b1..42fa88c5fb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/CreateDatabaseHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/CreateDatabaseHandler.java @@ -58,7 +58,7 @@ context.dbName == null ? db.getName() : context.dbName; CreateDatabaseDesc createDatabaseDesc = - new CreateDatabaseDesc(destinationDBName, db.getDescription(), null, null, true, db.getParameters()); + new CreateDatabaseDesc(destinationDBName, db.getDescription(), null, true, db.getParameters()); Task createDBTask = TaskFactory.get( new DDLWork(new HashSet<>(), new HashSet<>(), createDatabaseDesc), context.hiveConf); if (!db.getParameters().isEmpty()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ConstraintExprGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ConstraintExprGenerator.java deleted file mode 100644 index f72bd3200e..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ConstraintExprGenerator.java +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.parse.type; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.calcite.util.ImmutableBitSet; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.metadata.CheckConstraint; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.NotNullConstraint; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.HiveParser; -import org.apache.hadoop.hive.ql.parse.ParseDriver; -import org.apache.hadoop.hive.ql.parse.RowResolver; -import org.apache.hadoop.hive.ql.parse.SemanticException; - -/** - * This class generates enforce_constraint function expressions to validate constraints. - * @param Type of result expression: - *
    - *
  • {@link org.apache.hadoop.hive.ql.plan.ExprNodeDesc}
  • - *
  • {@link org.apache.calcite.rex.RexNode}
  • - *
- */ -class ConstraintExprGenerator { - private final HiveConf conf; - private final TypeCheckProcFactory typeCheckProcFactory; - private final TypeCheckProcFactory.DefaultExprProcessor exprProcessor; - - ConstraintExprGenerator(HiveConf conf, TypeCheckProcFactory typeCheckProcFactory) { - this.conf = conf; - this.typeCheckProcFactory = typeCheckProcFactory; - this.exprProcessor = typeCheckProcFactory.getDefaultExprProcessor(); - } - - public T genConstraintsExpr( - Table targetTable, boolean updateStatement, RowResolver inputRR) - throws SemanticException { - List inputColumnInfoList = inputRR.getColumnInfos(); - T nullConstraintExpr = getNotNullConstraintExpr(targetTable, inputRR, updateStatement); - T checkConstraintExpr = getCheckConstraintExpr(targetTable, inputColumnInfoList, inputRR, updateStatement); - T combinedConstraintExpr = null; - if (nullConstraintExpr != null && checkConstraintExpr != null) { - combinedConstraintExpr = exprProcessor.getFuncExprNodeDesc("and", nullConstraintExpr, checkConstraintExpr); - } else if (nullConstraintExpr != null) { - combinedConstraintExpr = nullConstraintExpr; - } else if (checkConstraintExpr != null) { - combinedConstraintExpr = checkConstraintExpr; - } - - if (combinedConstraintExpr == null) { - return null; - } - return exprProcessor.getFuncExprNodeDesc("enforce_constraint", combinedConstraintExpr); - } - - private T getNotNullConstraintExpr( - Table targetTable, RowResolver inputRR, boolean isUpdateStatement) - throws SemanticException { - boolean forceNotNullConstraint = conf.getBoolVar(HiveConf.ConfVars.HIVE_ENFORCE_NOT_NULL_CONSTRAINT); - if (!forceNotNullConstraint) { - return null; - } - - ImmutableBitSet nullConstraintBitSet; - try { - nullConstraintBitSet = getEnabledNotNullConstraints(targetTable); - } catch (SemanticException e) { - throw e; - } catch (Exception e) { - throw (new RuntimeException(e)); - } - - if (nullConstraintBitSet == null) { - return null; - } - - T currUDF = null; - int constraintIdx = 0; - List inputColInfos = inputRR.getColumnInfos(); - for (int colExprIdx = 0; colExprIdx < inputColInfos.size(); colExprIdx++) { - if (isUpdateStatement && colExprIdx == 0) { - // for updates first column is _rowid - continue; - } - if (nullConstraintBitSet.indexOf(constraintIdx) != -1) { - T currExpr = typeCheckProcFactory.exprFactory.createColumnRefExpr(inputColInfos.get(colExprIdx), inputRR, 0); - T isNotNullUDF = exprProcessor.getFuncExprNodeDesc("isnotnull", currExpr); - if (currUDF != null) { - currUDF = exprProcessor.getFuncExprNodeDesc("and", currUDF, isNotNullUDF); - } else { - currUDF = isNotNullUDF; - } - } - constraintIdx++; - } - return currUDF; - } - - private ImmutableBitSet getEnabledNotNullConstraints(Table tbl) throws HiveException { - final NotNullConstraint nnc = Hive.get().getEnabledNotNullConstraints( - tbl.getDbName(), tbl.getTableName()); - if (nnc == null || nnc.getNotNullConstraints().isEmpty()) { - return null; - } - // Build the bitset with not null columns - ImmutableBitSet.Builder builder = ImmutableBitSet.builder(); - for (String nnCol : nnc.getNotNullConstraints().values()) { - int nnPos = -1; - for (int i = 0; i < tbl.getCols().size(); i++) { - if (tbl.getCols().get(i).getName().equals(nnCol)) { - nnPos = i; - builder.set(nnPos); - break; - } - } - } - return builder.build(); - } - - private T getCheckConstraintExpr( - Table tbl, List inputColInfos, RowResolver inputRR, boolean isUpdateStatement) - throws SemanticException { - - CheckConstraint cc = null; - try { - cc = Hive.get().getEnabledCheckConstraints(tbl.getDbName(), tbl.getTableName()); - } catch (HiveException e) { - throw new SemanticException(e); - } - if (cc == null || cc.getCheckConstraints().isEmpty()) { - return null; - } - - // build a map which tracks the name of column in input's signature to corresponding table column name - // this will be used to replace column references in CHECK expression AST with corresponding column name - // in input - Map col2Cols = new HashMap<>(); - int colIdx = 0; - if (isUpdateStatement) { - // if this is an update we need to skip the first col since it is row id - colIdx = 1; - } - for (FieldSchema fs : tbl.getCols()) { - // since SQL is case insenstive just to make sure that the comparison b/w column names - // and check expression's column reference work convert the key to lower case - col2Cols.put(fs.getName().toLowerCase(), inputColInfos.get(colIdx).getInternalName()); - colIdx++; - } - - List checkExprStrs = cc.getCheckExpressionList(); - TypeCheckCtx typeCheckCtx = new TypeCheckCtx(inputRR); - T checkAndExprs = null; - for (String checkExprStr : checkExprStrs) { - try { - ParseDriver parseDriver = new ParseDriver(); - ASTNode checkExprAST = parseDriver.parseExpression(checkExprStr); - //replace column references in checkExprAST with corresponding columns in input - replaceColumnReference(checkExprAST, col2Cols, inputRR); - Map genExprs = typeCheckProcFactory.genExprNode(checkExprAST, typeCheckCtx); - T checkExpr = genExprs.get(checkExprAST); - // Check constraint fails only if it evaluates to false, NULL/UNKNOWN should evaluate to TRUE - T notFalseCheckExpr = exprProcessor.getFuncExprNodeDesc("isnotfalse", checkExpr); - if (checkAndExprs == null) { - checkAndExprs = notFalseCheckExpr; - } else { - checkAndExprs = exprProcessor.getFuncExprNodeDesc("and", checkAndExprs, notFalseCheckExpr); - } - } catch (Exception e) { - throw new SemanticException(e); - } - } - return checkAndExprs; - } - - private void replaceColumnReference(ASTNode checkExpr, Map col2Col, - RowResolver inputRR) { - if (checkExpr.getType() == HiveParser.TOK_TABLE_OR_COL) { - ASTNode oldColChild = (ASTNode) (checkExpr.getChild(0)); - String oldColRef = oldColChild.getText().toLowerCase(); - assert (col2Col.containsKey(oldColRef)); - String internalColRef = col2Col.get(oldColRef); - String[] fullQualColRef = inputRR.reverseLookup(internalColRef); - String newColRef = fullQualColRef[1]; - checkExpr.deleteChild(0); - checkExpr.addChild(ASTBuilder.createAST(oldColChild.getType(), newColRef)); - } else { - for (int i = 0; i < checkExpr.getChildCount(); i++) { - replaceColumnReference((ASTNode) (checkExpr.getChild(i)), col2Col, inputRR); - } - } - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeTypeCheck.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeTypeCheck.java index 1ecdfdc0db..3e3d331412 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeTypeCheck.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeTypeCheck.java @@ -19,10 +19,7 @@ package org.apache.hadoop.hive.ql.parse.type; import java.util.Map; - -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -78,10 +75,4 @@ public static ExprNodeDesc toExprNode(ColumnInfo columnInfo, RowResolver rowReso return factory.toExpr(columnInfo, rowResolver, 0); } - public static ExprNodeDesc genConstraintsExpr( - HiveConf conf, Table targetTable, boolean updateStatement, RowResolver inputRR) - throws SemanticException { - return new ConstraintExprGenerator<>(conf, new TypeCheckProcFactory<>(new ExprNodeDescExprFactory())) - .genConstraintsExpr(targetTable, updateStatement, inputRR); - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeTypeCheck.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeTypeCheck.java index 745c205827..c9131ec018 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeTypeCheck.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeTypeCheck.java @@ -20,9 +20,7 @@ import java.util.Map; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -69,10 +67,4 @@ public static RexNode toExprNode(ColumnInfo columnInfo, RowResolver rowResolver, return factory.toExpr(columnInfo, rowResolver, offset); } - public static RexNode genConstraintsExpr( - HiveConf conf, RexBuilder rexBuilder, Table targetTable, boolean updateStatement, RowResolver inputRR) - throws SemanticException { - return new ConstraintExprGenerator<>(conf, new TypeCheckProcFactory<>(new RexNodeExprFactory(rexBuilder))) - .genConstraintsExpr(targetTable, updateStatement, inputRR); - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 55bd27e006..6c6138ef56 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -416,6 +416,7 @@ public SessionState(HiveConf conf, String userName) { resourceMaps = new ResourceMaps(); // Must be deterministic order map for consistent q-test output across Java versions overriddenConfigurations = new LinkedHashMap(); + overriddenConfigurations.putAll(HiveConf.getConfSystemProperties()); // if there isn't already a session name, go ahead and create it. if (StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HIVESESSIONID))) { conf.setVar(HiveConf.ConfVars.HIVESESSIONID, makeSessionId()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java index 23512e252e..37a5862791 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java @@ -151,7 +151,7 @@ public void run() { recoverFailedCompactions(true); // Clean anything from the txns table that has no components left in txn_components. - txnHandler.cleanEmptyAbortedAndCommittedTxns(); + txnHandler.cleanEmptyAbortedTxns(); // Clean TXN_TO_WRITE_ID table for entries under min_uncommitted_txn referred by any open txns. txnHandler.cleanTxnToWriteIdTable(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java index 7682e1f5c2..00a6c89b1e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java @@ -133,7 +133,6 @@ protected transient IntObjectInspector intOI; protected transient JobConf jc; private boolean orderByQuery; - private boolean limitQuery; private boolean forceSingleSplit; protected ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); protected DataOutput dos = new DataOutputStream(bos); @@ -319,7 +318,6 @@ private PlanFragment createPlanFragment(String query, ApplicationId splitsAppId) QueryPlan plan = driver.getPlan(); orderByQuery = plan.getQueryProperties().hasOrderBy() || plan.getQueryProperties().hasOuterOrderBy(); - limitQuery = plan.getQueryProperties().getOuterQueryLimit() != -1; forceSingleSplit = orderByQuery && HiveConf.getBoolVar(conf, ConfVars.LLAP_EXTERNAL_SPLITS_ORDER_BY_FORCE_SINGLE_SPLIT); List> roots = plan.getRootTasks(); @@ -336,10 +334,9 @@ private PlanFragment createPlanFragment(String query, ApplicationId splitsAppId) } else { tezWork = ((TezTask) roots.get(0)).getWork(); } - // A simple limit query (select * from table limit n) generates only mapper work (no reduce phase). - // This can create multiple splits ignoring limit constraint, and multiple llap daemons working on those splits - // return more than "n" rows. Therefore, a limit query needs to be materialized. - if (tezWork == null || tezWork.getAllWork().size() != 1 || limitQuery) { + + if (tezWork == null || tezWork.getAllWork().size() != 1) { + String tableName = "table_" + UUID.randomUUID().toString().replaceAll("-", ""); String storageFormatString = getTempTableStorageFormatString(conf); diff --git a/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestCompactionTxnHandler.java b/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestCompactionTxnHandler.java index 7069dae393..7c8903fbae 100644 --- a/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestCompactionTxnHandler.java +++ b/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestCompactionTxnHandler.java @@ -512,14 +512,9 @@ public void testMarkCleanedCleansTxnsAndTxnComponents() // Check that we are cleaning up the empty aborted transactions GetOpenTxnsResponse txnList = txnHandler.getOpenTxns(); assertEquals(3, txnList.getOpen_txnsSize()); - // Create one aborted for low water mark - txnid = openTxn(); - txnHandler.abortTxn(new AbortTxnRequest(txnid)); - txnHandler.setOpenTxnTimeOutMillis(1); - txnHandler.cleanEmptyAbortedAndCommittedTxns(); + txnHandler.cleanEmptyAbortedTxns(); txnList = txnHandler.getOpenTxns(); - assertEquals(3, txnList.getOpen_txnsSize()); - txnHandler.setOpenTxnTimeOutMillis(1000); + assertEquals(2, txnList.getOpen_txnsSize()); rqst = new CompactionRequest("mydb", "foo", CompactionType.MAJOR); rqst.setPartitionname("bar"); @@ -534,13 +529,9 @@ public void testMarkCleanedCleansTxnsAndTxnComponents() txnHandler.markCleaned(ci); txnHandler.openTxns(new OpenTxnRequest(1, "me", "localhost")); - // The open txn will became the low water mark - Thread.sleep(txnHandler.getOpenTxnTimeOutMillis()); - txnHandler.setOpenTxnTimeOutMillis(1); - txnHandler.cleanEmptyAbortedAndCommittedTxns(); + txnHandler.cleanEmptyAbortedTxns(); txnList = txnHandler.getOpenTxns(); assertEquals(3, txnList.getOpen_txnsSize()); - txnHandler.setOpenTxnTimeOutMillis(1000); } @Test diff --git a/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java b/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java index 868da0c7a0..3916e88a9d 100644 --- a/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java +++ b/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java @@ -193,40 +193,30 @@ public void testAbortTxn() throws Exception { boolean gotException = false; try { txnHandler.abortTxn(new AbortTxnRequest(2)); - } catch(NoSuchTxnException ex) { + } + catch(NoSuchTxnException ex) { gotException = true; - // this is the last committed, so it is still in the txns table - Assert.assertEquals("Transaction " + JavaUtils.txnIdToString(2) + " is already committed.", ex.getMessage()); + //if this wasn't an empty txn, we'd get a better msg + Assert.assertEquals("No such transaction " + JavaUtils.txnIdToString(2), ex.getMessage()); } Assert.assertTrue(gotException); gotException = false; txnHandler.commitTxn(new CommitTxnRequest(3)); try { txnHandler.abortTxn(new AbortTxnRequest(3)); - } catch(NoSuchTxnException ex) { + } + catch(NoSuchTxnException ex) { gotException = true; //txn 3 is not empty txn, so we get a better msg Assert.assertEquals("Transaction " + JavaUtils.txnIdToString(3) + " is already committed.", ex.getMessage()); } Assert.assertTrue(gotException); - txnHandler.setOpenTxnTimeOutMillis(1); - txnHandler.cleanEmptyAbortedAndCommittedTxns(); - txnHandler.setOpenTxnTimeOutMillis(1000); - gotException = false; - try { - txnHandler.abortTxn(new AbortTxnRequest(2)); - } catch(NoSuchTxnException ex) { - gotException = true; - // now the second transaction is cleared and since it was empty, we do not recognize it anymore - Assert.assertEquals("No such transaction " + JavaUtils.txnIdToString(2), ex.getMessage()); - } - Assert.assertTrue(gotException); - gotException = false; try { txnHandler.abortTxn(new AbortTxnRequest(4)); - } catch(NoSuchTxnException ex) { + } + catch(NoSuchTxnException ex) { gotException = true; Assert.assertEquals("No such transaction " + JavaUtils.txnIdToString(4), ex.getMessage()); } @@ -1682,11 +1672,9 @@ private void checkReplTxnForTest(Long startTxnId, Long endTxnId, String replPoli @Test public void testReplOpenTxn() throws Exception { int numTxn = 50000; - String[] output = TxnDbUtil.queryToString(conf, "SELECT MAX(\"TXN_ID\") + 1 FROM \"TXNS\"").split("\n"); + String[] output = TxnDbUtil.queryToString(conf, "SELECT \"NTXN_NEXT\" FROM \"NEXT_TXN_ID\"").split("\n"); long startTxnId = Long.parseLong(output[1].trim()); - txnHandler.setOpenTxnTimeOutMillis(30000); List txnList = replOpenTxnForTest(startTxnId, numTxn, "default.*"); - txnHandler.setOpenTxnTimeOutMillis(1000); assert(txnList.size() == numTxn); txnHandler.abortTxns(new AbortTxnsRequest(txnList)); } @@ -1694,7 +1682,7 @@ public void testReplOpenTxn() throws Exception { @Test public void testReplAllocWriteId() throws Exception { int numTxn = 2; - String[] output = TxnDbUtil.queryToString(conf, "SELECT MAX(\"TXN_ID\") + 1 FROM \"TXNS\"").split("\n"); + String[] output = TxnDbUtil.queryToString(conf, "SELECT \"NTXN_NEXT\" FROM \"NEXT_TXN_ID\"").split("\n"); long startTxnId = Long.parseLong(output[1].trim()); List srcTxnIdList = LongStream.rangeClosed(startTxnId, numTxn+startTxnId-1) .boxed().collect(Collectors.toList()); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 48bf8529fa..2c13e8dd03 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -89,7 +89,6 @@ protected HiveConf hiveConf; protected Driver d; - private TxnStore txnHandler; protected enum Table { ACIDTBL("acidTbl"), ACIDTBLPART("acidTblPart", "p"), @@ -152,7 +151,6 @@ void setUpWithTableProperties(String tableProperties) throws Exception { TxnDbUtil.setConfValues(hiveConf); TxnDbUtil.prepDb(hiveConf); - txnHandler = TxnUtils.getTxnStore(hiveConf); File f = new File(TEST_WAREHOUSE_DIR); if (f.exists()) { FileUtil.fullyDelete(f); @@ -324,6 +322,7 @@ public void testOriginalFileReaderWhenNonAcidConvertedToAcid() throws Exception // 3. Perform a major compaction. runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); runWorker(hiveConf); + TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf); ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest()); Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize()); Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState()); @@ -931,6 +930,7 @@ public void updateDeletePartitioned() throws Exception { int[][] tableData = {{1,2},{3,4},{5,6}}; runStatementOnDriver("insert into " + Table.ACIDTBLPART + " partition(p=1) (a,b) " + makeValuesClause(tableData)); runStatementOnDriver("insert into " + Table.ACIDTBLPART + " partition(p=2) (a,b) " + makeValuesClause(tableData)); + TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf); txnHandler.compact(new CompactionRequest("default", Table.ACIDTBLPART.name(), CompactionType.MAJOR)); runWorker(hiveConf); runCleaner(hiveConf); @@ -953,6 +953,7 @@ public void testEmptyInTblproperties() throws Exception { runStatementOnDriver("update t1" + " set b = -2 where a = 1"); runStatementOnDriver("alter table t1 " + " compact 'MAJOR'"); runWorker(hiveConf); + TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf); ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest()); Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize()); Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState()); @@ -1028,6 +1029,7 @@ void testInitiatorWithMultipleFailedCompactionsForVariousTblProperties(String tb hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEFAILCOMPACTION, true); int numFailedCompactions = hiveConf.getIntVar(HiveConf.ConfVars.COMPACTOR_INITIATOR_FAILED_THRESHOLD); + TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf); AtomicBoolean stop = new AtomicBoolean(true); //create failed compactions for(int i = 0; i < numFailedCompactions; i++) { @@ -1215,6 +1217,7 @@ private void writeBetweenWorkerAndCleanerForVariousTblProperties(String tblPrope runStatementOnDriver("update " + tblName + " set b = 'blah' where a = 3"); //run Worker to execute compaction + TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf); txnHandler.compact(new CompactionRequest("default", tblName, CompactionType.MINOR)); runWorker(hiveConf); @@ -1274,6 +1277,7 @@ public void testFailHeartbeater() throws Exception { public void testOpenTxnsCounter() throws Exception { hiveConf.setIntVar(HiveConf.ConfVars.HIVE_MAX_OPEN_TXNS, 3); hiveConf.setTimeVar(HiveConf.ConfVars.HIVE_COUNT_OPEN_TXNS_INTERVAL, 10, TimeUnit.MILLISECONDS); + TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf); OpenTxnsResponse openTxnsResponse = txnHandler.openTxns(new OpenTxnRequest(3, "me", "localhost")); AcidOpenTxnsCounterService openTxnsCounterService = new AcidOpenTxnsCounterService(); @@ -1315,6 +1319,7 @@ public void testCompactWithDelete() throws Exception { runStatementOnDriver("update " + Table.ACIDTBL + " set b = -2 where b = 2"); runStatementOnDriver("alter table "+ Table.ACIDTBL + " compact 'MINOR'"); runWorker(hiveConf); + TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf); ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest()); Assert.assertEquals("Unexpected number of compactions in history", 2, resp.getCompactsSize()); Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState()); @@ -1375,8 +1380,6 @@ protected void testACIDwithSchemaEvolutionForVariousTblProperties(String tblProp // now compact and see if compaction still preserves the data correctness runStatementOnDriver("alter table "+ tblName + " compact 'MAJOR'"); runWorker(hiveConf); - // create a low water mark aborted transaction and clean the older ones - createAbortLowWaterMark(); runCleaner(hiveConf); // Cleaner would remove the obsolete files. // Verify that there is now only 1 new directory: base_xxxxxxx and the rest have have been cleaned. @@ -1400,14 +1403,6 @@ protected void testACIDwithSchemaEvolutionForVariousTblProperties(String tblProp Assert.assertEquals(Arrays.asList(expectedResult), rs); } - protected void createAbortLowWaterMark() throws Exception{ - hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, true); - runStatementOnDriver("select * from " + Table.ACIDTBL); - // wait for metastore.txn.opentxn.timeout - Thread.sleep(1000); - runInitiator(hiveConf); - } - @Test public void testETLSplitStrategyForACID() throws Exception { hiveConf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, "ETL"); @@ -2012,6 +2007,7 @@ public void testSchemaEvolutionCompaction() throws Exception { Assert.assertEquals("2\t2000\taa", res.get(1)); // Compact + TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf); CompactionRequest compactionRequest = new CompactionRequest("default", tblName, CompactionType.MAJOR); compactionRequest.setPartitionname("part=aa"); @@ -2058,6 +2054,7 @@ public void testCleanerForTxnToWriteId() throws Exception { Assert.assertEquals(TxnDbUtil.queryToString(hiveConf, "select * from TXN_TO_WRITE_ID" + acidTblPartWhereClause), 2, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from TXN_TO_WRITE_ID" + acidTblPartWhereClause)); + TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf); txnHandler.compact(new CompactionRequest("default", Table.ACIDTBL.name().toLowerCase(), CompactionType.MAJOR)); runWorker(hiveConf); runCleaner(hiveConf); @@ -2099,7 +2096,7 @@ public void testCleanerForTxnToWriteId() throws Exception { // The entry relevant to aborted txns shouldn't be removed from TXN_TO_WRITE_ID as // aborted txn would be removed from TXNS only after the compaction. Also, committed txn > open txn is retained. // As open txn doesn't allocate writeid, the 2 entries for aborted and committed should be retained. - txnHandler.cleanEmptyAbortedAndCommittedTxns(); + txnHandler.cleanEmptyAbortedTxns(); txnHandler.cleanTxnToWriteIdTable(); Assert.assertEquals(TxnDbUtil.queryToString(hiveConf, "select * from TXN_TO_WRITE_ID" + acidTblWhereClause), 3, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from TXN_TO_WRITE_ID" + acidTblWhereClause)); @@ -2112,7 +2109,7 @@ public void testCleanerForTxnToWriteId() throws Exception { txnHandler.compact(new CompactionRequest("default", Table.ACIDTBL.name().toLowerCase(), CompactionType.MAJOR)); runWorker(hiveConf); runCleaner(hiveConf); - txnHandler.cleanEmptyAbortedAndCommittedTxns(); + txnHandler.cleanEmptyAbortedTxns(); txnHandler.cleanTxnToWriteIdTable(); Assert.assertEquals(TxnDbUtil.queryToString(hiveConf, "select * from TXN_TO_WRITE_ID"), 2, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from TXN_TO_WRITE_ID")); @@ -2237,6 +2234,7 @@ public void testDeleteEventsCompaction() throws Exception { runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData2)); runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData3)); + TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf); txnHandler.compact(new CompactionRequest("default", Table.ACIDTBL.name().toLowerCase(), CompactionType.MINOR)); runWorker(hiveConf); runCleaner(hiveConf); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java index 5b8c6701e1..51b0fa336f 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java @@ -479,15 +479,11 @@ public void testCompactionAbort() throws Exception { runStatementOnDriver("alter table " + TestTxnCommands2.Table.ACIDTBL + " compact 'MAJOR'"); runWorker(hiveConf); - // Clean committed after TXN_OPENTXN_TIMEOUT, one transaction should always remain - hiveConf.set("metastore.txn.opentxn.timeout", "1"); - runInitiator(hiveConf); - hiveConf.set("metastore.txn.opentxn.timeout", "1000"); - assertOneTxn(); + assertTableIsEmpty("TXNS"); assertTableIsEmpty("TXN_COMPONENTS"); runCleaner(hiveConf); - assertOneTxn(); + assertTableIsEmpty("TXNS"); assertTableIsEmpty("TXN_COMPONENTS"); } @@ -504,14 +500,11 @@ public void testCompactionAbort() throws Exception { runStatementOnDriver("alter table " + TestTxnCommands2.Table.ACIDTBL + " compact 'MAJOR'"); runWorker(hiveConf); - // Clean committed after TXN_OPENTXN_TIMEOUT, one transaction should always remain - hiveConf.set("metastore.txn.opentxn.timeout", "1"); - runInitiator(hiveConf); - hiveConf.set("metastore.txn.opentxn.timeout", "1000"); + assertTableIsEmpty("TXNS"); assertTableIsEmpty("TXN_COMPONENTS"); runCleaner(hiveConf); - assertOneTxn(); + assertTableIsEmpty("TXNS"); assertTableIsEmpty("TXN_COMPONENTS"); } @@ -519,8 +512,4 @@ private void assertTableIsEmpty(String table) throws Exception { Assert.assertEquals(TxnDbUtil.queryToString(hiveConf, "select * from " + table), 0, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from " + table)); } - private void assertOneTxn() throws Exception { - Assert.assertEquals(TxnDbUtil.queryToString(hiveConf, "select * from TXNS"), 1, - TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from TXNS")); - } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsForMmTable.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsForMmTable.java index eac2c6307f..6525ffc00a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsForMmTable.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsForMmTable.java @@ -465,11 +465,8 @@ public void testOperationsOnCompletedTxnComponentsForMmTable() throws Exception Assert.assertEquals(TxnDbUtil.queryToString(hiveConf, "select * from COMPLETED_TXN_COMPONENTS"), 2, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from COMPLETED_TXN_COMPONENTS")); - // Clean committed after TXN_OPENTXN_TIMEOUT, one transaction should always remain - Thread.sleep(1000); - runInitiator(hiveConf); Assert.assertEquals(TxnDbUtil.queryToString(hiveConf, "select * from TXNS"), - 1, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from TXNS")); + 0, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from TXNS")); // Initiate a major compaction request on the table. runStatementOnDriver("alter table " + TableExtended.MMTBL + " compact 'MAJOR'"); @@ -478,16 +475,13 @@ public void testOperationsOnCompletedTxnComponentsForMmTable() throws Exception runWorker(hiveConf); verifyDirAndResult(2, true); - // Clean committed after TXN_OPENTXN_TIMEOUT, one transaction should always remain - Thread.sleep(1000); - runInitiator(hiveConf); // Run Cleaner. runCleaner(hiveConf); Assert.assertEquals(TxnDbUtil.queryToString(hiveConf, "select * from COMPLETED_TXN_COMPONENTS"), 0, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from COMPLETED_TXN_COMPONENTS")); Assert.assertEquals(TxnDbUtil.queryToString(hiveConf, "select * from TXNS"), - 1, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from TXNS")); + 0, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from TXNS")); verifyDirAndResult(0, true); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index 3ff68a3c7e..1435269ed3 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -34,8 +34,6 @@ import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; -import org.apache.hadoop.hive.metastore.txn.TxnStore; -import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.processors.CommandProcessorException; import org.apache.hadoop.hive.ql.session.SessionState; @@ -59,8 +57,6 @@ public TestName testName = new TestName(); protected HiveConf hiveConf; Driver d; - private TxnStore txnHandler; - public enum Table { ACIDTBL("acidTbl"), ACIDTBLPART("acidTblPart"), @@ -79,10 +75,6 @@ public String toString() { } } - public TxnStore getTxnStore() { - return txnHandler; - } - @Before public void setUp() throws Exception { setUpInternal(); @@ -114,7 +106,6 @@ void setUpInternal() throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); hiveConf.setBoolean("mapred.input.dir.recursive", true); TxnDbUtil.setConfValues(hiveConf); - txnHandler = TxnUtils.getTxnStore(hiveConf); TxnDbUtil.prepDb(hiveConf); File f = new File(getWarehouseDir()); if (f.exists()) { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java index 3c0a7eb0b3..8a0606b7a3 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java @@ -32,18 +32,14 @@ import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.io.IOContextMap; -import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin; import org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile; -import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck; import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory; -import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.CollectDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; @@ -53,7 +49,6 @@ import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -530,130 +525,4 @@ public void testLlapMemoryOversubscriptionMaxExecutorsPerQueryCalculation() { assertEquals(5, convertJoinMapJoin.getMemoryMonitorInfo(hiveConf, null).getMaxExecutorsOverSubscribeMemory()); } - - @Test public void testHashGroupBy() throws HiveException { - InspectableObject[] input = constructHashAggrInputData(5, 3); - System.out.println("---------------Begin to Construct Groupby Desc-------------"); - // 1. Build AggregationDesc - String aggregate = "MAX"; - ExprNodeDesc inputColumn = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col0", "table", false); - ArrayList params = new ArrayList(); - params.add(inputColumn); - GenericUDAFEvaluator genericUDAFEvaluator = - SemanticAnalyzer.getGenericUDAFEvaluator(aggregate, params, null, false, false); - AggregationDesc agg = - new AggregationDesc(aggregate, genericUDAFEvaluator, params, false, GenericUDAFEvaluator.Mode.PARTIAL1); - ArrayList aggs = new ArrayList(); - aggs.add(agg); - - // 2. aggr keys - ExprNodeDesc key1 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col1", "table", false); - ExprNodeDesc key2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col2", "table", false); - ArrayList keys = new ArrayList<>(); - keys.add(key1); - keys.add(key2); - - // 3. outputCols - // @see org.apache.hadoop.hive.ql.exec.GroupByOperator.forward - // outputColumnNames, including: group by keys, agg evaluators output cols. - ArrayList outputColumnNames = new ArrayList(); - for (int i = 0; i < keys.size() + aggs.size(); i++) { - outputColumnNames.add("_col" + i); - } - // 4. build GroupByDesc desc - GroupByDesc desc = new GroupByDesc(); - desc.setOutputColumnNames(outputColumnNames); - desc.setAggregators(aggs); - desc.setKeys(keys); - desc.setMode(GroupByDesc.Mode.HASH); - desc.setMemoryThreshold(1.0f); - desc.setGroupByMemoryUsage(1.0f); - // minReductionHashAggr - desc.setMinReductionHashAggr(0.5f); - - // 5. Configure hive conf and Build group by operator - HiveConf hconf = new HiveConf(); - HiveConf.setIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL, 1); - - // 6. test hash aggr without grouping sets - System.out.println("---------------Begin to test hash group by without grouping sets-------------"); - int withoutGroupingSetsExpectSize = 3; - GroupByOperator op = new GroupByOperator(new CompilationOpContext()); - op.setConf(desc); - testHashAggr(op, hconf, input, withoutGroupingSetsExpectSize); - - // 7. test hash aggr with grouping sets - System.out.println("---------------Begin to test hash group by with grouping sets------------"); - int groupingSetsExpectSize = 6; - - desc.setGroupingSetsPresent(true); - ArrayList groupingSets = new ArrayList<>(); - // groupingSets - groupingSets.add(1L); - groupingSets.add(2L); - desc.setListGroupingSets(groupingSets); - // add grouping sets dummy key - ExprNodeDesc groupingSetDummyKey = new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, 0L); - keys.add(groupingSetDummyKey); - desc.setKeys(keys); - // groupingSet Position - desc.setGroupingSetPosition(2); - op = new GroupByOperator(new CompilationOpContext()); - op.setConf(desc); - testHashAggr(op, hconf, input, groupingSetsExpectSize); - } - - private void testHashAggr(GroupByOperator op, HiveConf hconf, InspectableObject[] r, int expectOutputSize) - throws HiveException { - // 1. Collect operator to observe the output of the group by operator - CollectDesc cd = new CollectDesc(expectOutputSize + 10); - CollectOperator cdop = (CollectOperator) OperatorFactory.getAndMakeChild(cd, op); - op.initialize(hconf, new ObjectInspector[] { r[0].oi }); - // 2. Evaluate on rows and check hashAggr flag - for (int i = 0; i < r.length; i++) { - op.process(r[i].o, 0); - } - op.close(false); - InspectableObject io = new InspectableObject(); - int output = 0; - // 3. Print group by results - do { - cdop.retrieve(io); - if (io.o != null) { - System.out.println("io.o = " + io.o); - output++; - } - } while (io.o != null); - // 4. Check partial result size - assertEquals(expectOutputSize, output); - } - - private InspectableObject[] constructHashAggrInputData(int rowNum, int rowNumWithSameKeys) { - InspectableObject[] r; - r = new InspectableObject[rowNum]; - ArrayList names = new ArrayList(3); - names.add("col0"); - names.add("col1"); - names.add("col2"); - ArrayList objectInspectors = new ArrayList(3); - objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); - objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); - objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); - // 3 rows with the same col1, col2 - for (int i = 0; i < rowNum; i++) { - ArrayList data = new ArrayList(); - data.add("" + i); - data.add("" + (i < rowNumWithSameKeys ? -1 : i)); - data.add("" + (i < rowNumWithSameKeys ? -1 : i)); - try { - r[i] = new InspectableObject(); - r[i].o = data; - r[i].oi = ObjectInspectorFactory.getStandardStructObjectInspector(names, objectInspectors); - } catch (Throwable e) { - throw new RuntimeException(e); - } - } - return r; - } - } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java index c59cc095fc..0bda6203d1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.ql.io.filter; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.junit.Assert; import org.junit.Test; @@ -44,7 +43,7 @@ @Test public void testInitFilterContext(){ - MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0); + MutableFilterContext mutableFilterContext = new MutableFilterContext(); int[] selected = makeValidSelected(); mutableFilterContext.setFilterContext(true, selected, selected.length); @@ -58,7 +57,7 @@ public void testInitFilterContext(){ @Test public void testResetFilterContext(){ - MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0); + MutableFilterContext mutableFilterContext = new MutableFilterContext(); int[] selected = makeValidSelected(); mutableFilterContext.setFilterContext(true, selected, selected.length); @@ -68,32 +67,55 @@ public void testResetFilterContext(){ Assert.assertEquals(512, filterContext.getSelectedSize()); Assert.assertEquals(512, filterContext.getSelected().length); - filterContext.reset(); + filterContext.resetFilterContext(); Assert.assertEquals(false, filterContext.isSelectedInUse()); Assert.assertEquals(0, filterContext.getSelectedSize()); + Assert.assertEquals(null, filterContext.getSelected()); } @Test(expected=AssertionError.class) public void testInitInvalidFilterContext(){ - MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0); + MutableFilterContext mutableFilterContext = new MutableFilterContext(); int[] selected = makeInvalidSelected(); mutableFilterContext.setFilterContext(true, selected, selected.length); } + + @Test + public void testCopyFilterContext(){ + MutableFilterContext mutableFilterContext = new MutableFilterContext(); + int[] selected = makeValidSelected(); + + mutableFilterContext.setFilterContext(true, selected, selected.length); + + MutableFilterContext mutableFilterContextToCopy = new MutableFilterContext(); + mutableFilterContextToCopy.setFilterContext(true, new int[] {100}, 1); + + mutableFilterContext.copyFilterContextFrom(mutableFilterContextToCopy); + FilterContext filterContext = mutableFilterContext.immutable(); + + Assert.assertEquals(true, filterContext.isSelectedInUse()); + Assert.assertEquals(1, filterContext.getSelectedSize()); + Assert.assertEquals(100, filterContext.getSelected()[0]); + // make sure we kept the remaining array space + Assert.assertEquals(512, filterContext.getSelected().length); + } + + @Test public void testBorrowSelected(){ - MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0); + MutableFilterContext mutableFilterContext = new MutableFilterContext(); mutableFilterContext.setFilterContext(true, new int[] {100, 200}, 2); - int[] borrowedSelected = mutableFilterContext.updateSelected(1); + int[] borrowedSelected = mutableFilterContext.borrowSelected(1); // make sure we borrowed the existing array Assert.assertEquals(2, borrowedSelected.length); Assert.assertEquals(100, borrowedSelected[0]); Assert.assertEquals(200, borrowedSelected[1]); - borrowedSelected = mutableFilterContext.updateSelected(3); + borrowedSelected = mutableFilterContext.borrowSelected(3); Assert.assertEquals(3, borrowedSelected.length); Assert.assertEquals(0, borrowedSelected[0]); Assert.assertEquals(0, borrowedSelected[1]); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java index 74e2495269..be4c880812 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java @@ -21,8 +21,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; import java.time.ZoneId; import org.apache.hadoop.hive.common.type.Timestamp; @@ -110,33 +108,6 @@ public void testGetDecimalConverterDoubleHiveType() throws Exception { assertEquals(22, (int) doubleWritable.get()); } - @Test - public void testGetSmallBigIntConverter() { - Timestamp timestamp = Timestamp.valueOf("1998-10-03 09:58:31.231"); - long msTime = timestamp.toEpochMilli(); - ByteBuffer buf = ByteBuffer.allocate(12); - buf.order(ByteOrder.LITTLE_ENDIAN); - buf.putLong(msTime); - buf.flip(); - // Need TimeStamp logicalType annotation here - PrimitiveType primitiveType = createInt64TimestampType(false, TimeUnit.MILLIS); - Writable writable = getWritableFromBinaryConverter(createHiveTypeInfo("bigint"), primitiveType, Binary.fromByteBuffer(buf)); - // Retrieve as BigInt - LongWritable longWritable = (LongWritable) writable; - assertEquals(msTime, longWritable.get()); - } - - @Test - public void testGetBigIntConverter() { - Timestamp timestamp = Timestamp.valueOf("1998-10-03 09:58:31.231"); - NanoTime nanoTime = NanoTimeUtils.getNanoTime(timestamp, true); - PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT96).named("value"); - Writable writable = getWritableFromBinaryConverter(createHiveTypeInfo("bigint"), primitiveType, nanoTime.toBinary()); - // Retrieve as BigInt - LongWritable longWritable = (LongWritable) writable; - assertEquals(nanoTime.getTimeOfDayNanos(), longWritable.get()); - } - @Test public void testGetTimestampConverter() throws Exception { Timestamp timestamp = Timestamp.valueOf("2018-06-15 15:12:20.0"); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java deleted file mode 100644 index b435e79c3c..0000000000 --- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.lockmgr; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; -import org.apache.hadoop.hive.metastore.txn.TxnStore; -import org.apache.hadoop.hive.metastore.txn.TxnUtils; -import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.QueryState; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.BeforeClass; - -/** - * Base class for "end-to-end" tests for DbTxnManager and simulate concurrent queries. - */ -public abstract class DbTxnManagerEndToEndTestBase { - - protected static HiveConf conf = new HiveConf(Driver.class); - protected HiveTxnManager txnMgr; - protected Context ctx; - protected Driver driver, driver2; - protected TxnStore txnHandler; - - public DbTxnManagerEndToEndTestBase() { - conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, - "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); - conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); - TxnDbUtil.setConfValues(conf); - } - @BeforeClass - public static void setUpDB() throws Exception{ - TxnDbUtil.prepDb(conf); - } - - @Before - public void setUp() throws Exception { - conf.setBoolVar(HiveConf.ConfVars.TXN_WRITE_X_LOCK, false); - SessionState.start(conf); - ctx = new Context(conf); - driver = new Driver(new QueryState.Builder().withHiveConf(conf).nonIsolated().build()); - driver2 = new Driver(new QueryState.Builder().withHiveConf(conf).build()); - TxnDbUtil.cleanDb(conf); - SessionState ss = SessionState.get(); - ss.initTxnMgr(conf); - txnMgr = ss.getTxnMgr(); - Assert.assertTrue(txnMgr instanceof DbTxnManager); - txnHandler = TxnUtils.getTxnStore(conf); - - } - @After - public void tearDown() throws Exception { - driver.close(); - driver2.close(); - if (txnMgr != null) { - txnMgr.closeTxnManager(); - } - } -} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/ITestDbTxnManager.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/ITestDbTxnManager.java index 12cbc2a417..a085e9ff6f 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/ITestDbTxnManager.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/ITestDbTxnManager.java @@ -62,7 +62,8 @@ public static void setupDb() throws Exception { MetastoreConf.getVar(conf, MetastoreConf.ConfVars.CONNECT_URL_KEY)); // Start the docker container and create the hive user rule.before(); - rule.install(); + rule.createUser(); + // We do not run the install script, it will be called anyway before every test in prepDb } @AfterClass diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java index 1687425bcb..f90396b2a3 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java @@ -32,14 +32,22 @@ import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.txn.AcidWriteSetService; +import org.apache.hadoop.hive.metastore.txn.TxnStore; +import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.ql.TestTxnCommands2; +import org.junit.After; import org.junit.Assert; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.processors.CommandProcessorException; import org.apache.hadoop.hive.ql.session.SessionState; +import org.junit.Before; +import org.junit.BeforeClass; import org.junit.ComparisonFailure; import org.junit.Rule; import org.junit.Test; @@ -74,7 +82,47 @@ * using {@link #swapTxnManager(HiveTxnManager)} since in the SessionState the TM is associated with * each thread. */ -public class TestDbTxnManager2 extends DbTxnManagerEndToEndTestBase{ +public class TestDbTxnManager2 { + protected static HiveConf conf = new HiveConf(Driver.class); + + private HiveTxnManager txnMgr; + private Context ctx; + private Driver driver; + private TxnStore txnHandler; + + public TestDbTxnManager2() { + conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, + "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); + conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); + TxnDbUtil.setConfValues(conf); + } + + @BeforeClass + public static void setUpDB() throws Exception{ + TxnDbUtil.prepDb(conf); + } + + @Before + public void setUp() throws Exception { + conf.setBoolVar(HiveConf.ConfVars.TXN_WRITE_X_LOCK, false); + SessionState.start(conf); + ctx = new Context(conf); + driver = new Driver(new QueryState.Builder().withHiveConf(conf).nonIsolated().build()); + TxnDbUtil.cleanDb(conf); + SessionState ss = SessionState.get(); + ss.initTxnMgr(conf); + txnMgr = ss.getTxnMgr(); + Assert.assertTrue(txnMgr instanceof DbTxnManager); + txnHandler = TxnUtils.getTxnStore(conf); + } + + @After + public void tearDown() throws Exception { + driver.close(); + if (txnMgr != null) { + txnMgr.closeTxnManager(); + } + } /** * HIVE-16688 @@ -1189,13 +1237,6 @@ public void testWriteSetTracking4() throws Exception { locks = getLocks(txnMgr); Assert.assertEquals("Unexpected lock count", 0, locks.size()); - /** - * The last transaction will always remain in the transaction table, so we will open an other one, - * wait for the timeout period to exceed, then start the initiator that will clean - */ - txnMgr.openTxn(ctx, "Long Running"); - Thread.sleep(txnHandler.getOpenTxnTimeOutMillis()); - // Now we can clean the write_set houseKeeper.run(); Assert.assertEquals(0, TxnDbUtil.countQueryAgent(conf, "select count(*) from \"WRITE_SET\"")); } @@ -1270,13 +1311,6 @@ public void testWriteSetTracking6() throws Exception { Assert.assertEquals("Unexpected lock count", 1, locks.size()); checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "TAB2", null, locks); txnMgr.commitTxn(); - /* - * The last transaction will always remain in the transaction table, so we will open an other one, - * wait for the timeout period to exceed, then start the initiator that will clean - */ - txnMgr.openTxn(ctx, "Long Running"); - Thread.sleep(txnHandler.getOpenTxnTimeOutMillis()); - // Now we can clean the write_set MetastoreTaskThread writeSetService = new AcidWriteSetService(); writeSetService.setConf(conf); writeSetService.run(); @@ -2495,17 +2529,7 @@ public void testShowTablesLock() throws Exception { @Test public void testFairness() throws Exception { - testFairness(false); - } - - @Test - public void testFairnessZeroWaitRead() throws Exception { - testFairness(true); - } - - private void testFairness(boolean zeroWaitRead) throws Exception { - dropTable(new String[]{"T6"}); - conf.setBoolVar(HiveConf.ConfVars.TXN_WRITE_X_LOCK, !zeroWaitRead); + dropTable(new String[] {"T6"}); driver.run("create table if not exists T6(a int)"); driver.compileAndRespond("select a from T6", true); txnMgr.acquireLocks(driver.getPlan(), ctx, "Fifer"); //gets S lock on T6 @@ -2517,30 +2541,18 @@ private void testFairness(boolean zeroWaitRead) throws Exception { List locks = getLocks(); Assert.assertEquals("Unexpected lock count", 2, locks.size()); checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T6", null, locks); - long extLockId = checkLock(LockType.EXCLUSIVE, LockState.WAITING, "default", "T6", null, locks).getLockid(); + checkLock(LockType.EXCLUSIVE, LockState.WAITING, "default", "T6", null, locks); HiveTxnManager txnMgr3 = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf); swapTxnManager(txnMgr3); //this should block behind the X lock on T6 //this is a contrived example, in practice this query would of course fail after drop table driver.compileAndRespond("select a from T6", true); - try { - ((DbTxnManager) txnMgr3).acquireLocks(driver.getPlan(), ctx, "Fifer", false); //gets S lock on T6 - } catch (LockException ex) { - Assert.assertTrue(zeroWaitRead); - Assert.assertEquals("Exception msg didn't match", - ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg() + " LockResponse(lockid:" + (extLockId + 1) + - ", state:NOT_ACQUIRED, errorMessage:Unable to acquire read lock due to an exclusive lock" + - " {lockid:" + extLockId + " intLockId:1 txnid:" + txnMgr2.getCurrentTxnId() + - " db:default table:t6 partition:null state:WAITING type:EXCLUSIVE})", - ex.getMessage()); - } + ((DbTxnManager)txnMgr3).acquireLocks(driver.getPlan(), ctx, "Fifer", false); //gets S lock on T6 locks = getLocks(); - Assert.assertEquals("Unexpected lock count", (zeroWaitRead ? 2 : 3), locks.size()); + Assert.assertEquals("Unexpected lock count", 3, locks.size()); checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T6", null, locks); - if (!zeroWaitRead) { - checkLock(LockType.SHARED_READ, LockState.WAITING, "default", "T6", null, locks); - } + checkLock(LockType.SHARED_READ, LockState.WAITING, "default", "T6", null, locks); checkLock(LockType.EXCLUSIVE, LockState.WAITING, "default", "T6", null, locks); } @@ -2556,17 +2568,7 @@ private void testFairness(boolean zeroWaitRead) throws Exception { */ @Test public void testFairness2() throws Exception { - testFairness2(false); - } - - @Test - public void testFairness2ZeroWaitRead() throws Exception { - testFairness2(true); - } - - private void testFairness2(boolean zeroWaitRead) throws Exception { dropTable(new String[]{"T7"}); - conf.setBoolVar(HiveConf.ConfVars.TXN_WRITE_X_LOCK, !zeroWaitRead); driver.run("create table if not exists T7 (a int) " + "partitioned by (p int) stored as orc TBLPROPERTIES ('transactional'='true')"); driver.run("insert into T7 partition(p) values(1,1),(1,2)"); //create 2 partitions @@ -2582,56 +2584,42 @@ private void testFairness2(boolean zeroWaitRead) throws Exception { checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T7", null, locks); checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T7", "p=1", locks); checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T7", "p=2", locks); - long extLockId = checkLock(LockType.EXCLUSIVE, LockState.WAITING, "default", "T7", "p=1", locks).getLockid(); + checkLock(LockType.EXCLUSIVE, LockState.WAITING, "default", "T7", "p=1", locks); HiveTxnManager txnMgr3 = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf); swapTxnManager(txnMgr3); //this should block behind the X lock on T7.p=1 driver.compileAndRespond("select a from T7", true); //tries to get S lock on T7, S on T7.p=1 and S on T7.p=2 - try { - ((DbTxnManager) txnMgr3).acquireLocks(driver.getPlan(), ctx, "Fifer", false); - } catch (LockException ex) { - Assert.assertTrue(zeroWaitRead); - Assert.assertEquals("Exception msg didn't match", - ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg() + " LockResponse(lockid:" + (extLockId + 1) + - ", state:NOT_ACQUIRED, errorMessage:Unable to acquire read lock due to an exclusive lock" + - " {lockid:" + extLockId + " intLockId:1 txnid:" + txnMgr2.getCurrentTxnId() + - " db:default table:t7 partition:p=1 state:WAITING type:EXCLUSIVE})", - ex.getMessage()); - } + ((DbTxnManager)txnMgr3).acquireLocks(driver.getPlan(), ctx, "Fifer", false); locks = getLocks(); - Assert.assertEquals("Unexpected lock count", (zeroWaitRead ? 4 : 7), locks.size()); + Assert.assertEquals("Unexpected lock count", 7, locks.size()); checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T7", null, locks); checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T7", "p=1", locks); checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T7", "p=2", locks); - if (!zeroWaitRead) { - checkLock(LockType.SHARED_READ, LockState.WAITING, "default", "T7", null, locks); - checkLock(LockType.SHARED_READ, LockState.WAITING, "default", "T7", "p=1", locks); - checkLock(LockType.SHARED_READ, LockState.WAITING, "default", "T7", "p=2", locks); - } + checkLock(LockType.SHARED_READ, LockState.WAITING, "default", "T7", null, locks); + checkLock(LockType.SHARED_READ, LockState.WAITING, "default", "T7", "p=1", locks); + checkLock(LockType.SHARED_READ, LockState.WAITING, "default", "T7", "p=2", locks); checkLock(LockType.EXCLUSIVE, LockState.WAITING, "default", "T7", "p=1", locks); txnMgr.commitTxn(); //release locks from "select a from T7" - to unblock hte drop partition //retest the the "drop partiton" X lock - ((DbLockManager)txnMgr2.getLockManager()).checkLock(locks.get(zeroWaitRead ? 3 : 6).getLockid()); + ((DbLockManager)txnMgr2.getLockManager()).checkLock(locks.get(6).getLockid()); locks = getLocks(); - Assert.assertEquals("Unexpected lock count", (zeroWaitRead ? 1 : 4), locks.size()); + Assert.assertEquals("Unexpected lock count", 4, locks.size()); checkLock(LockType.EXCLUSIVE, LockState.ACQUIRED, "default", "T7", "p=1", locks); - if (!zeroWaitRead) { - checkLock(LockType.SHARED_READ, LockState.WAITING, "default", "T7", null, locks); - checkLock(LockType.SHARED_READ, LockState.WAITING, "default", "T7", "p=1", locks); - checkLock(LockType.SHARED_READ, LockState.WAITING, "default", "T7", "p=2", locks); - - txnMgr2.rollbackTxn(); //release the X lock on T7.p=1 - //re-test the locks - ((DbLockManager) txnMgr2.getLockManager()).checkLock(locks.get(1).getLockid()); //S lock on T7 - locks = getLocks(); - Assert.assertEquals("Unexpected lock count", 3, locks.size()); - checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T7", null, locks); - checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T7", "p=1", locks); - checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T7", "p=2", locks); - } + checkLock(LockType.SHARED_READ, LockState.WAITING, "default", "T7", null, locks); + checkLock(LockType.SHARED_READ, LockState.WAITING, "default", "T7", "p=1", locks); + checkLock(LockType.SHARED_READ, LockState.WAITING, "default", "T7", "p=2", locks); + + txnMgr2.rollbackTxn(); //release the X lock on T7.p=1 + //re-test the locks + ((DbLockManager)txnMgr2.getLockManager()).checkLock(locks.get(1).getLockid()); //S lock on T7 + locks = getLocks(); + Assert.assertEquals("Unexpected lock count", 3, locks.size()); + checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T7", null, locks); + checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T7", "p=1", locks); + checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T7", "p=2", locks); } @Test diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManagerIsolationProperties.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManagerIsolationProperties.java deleted file mode 100644 index 6c300695af..0000000000 --- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManagerIsolationProperties.java +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.lockmgr; - -import org.apache.hadoop.hive.common.ValidTxnList; -import org.apache.hadoop.hive.metastore.api.CommitTxnRequest; -import org.apache.hadoop.hive.metastore.api.OpenTxnRequest; -import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse; -import org.apache.hadoop.hive.metastore.datasource.DataSourceProvider; -import org.apache.hadoop.hive.metastore.datasource.DataSourceProviderFactory; -import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.junit.Assert; -import org.junit.Test; - -import javax.sql.DataSource; -import java.sql.Connection; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.List; - -/** - * Tests to check the ACID properties and isolation level requirements. - */ -public class TestDbTxnManagerIsolationProperties extends DbTxnManagerEndToEndTestBase { - - @Test - public void basicOpenTxnsNoDirtyRead() throws Exception { - driver.run(("drop table if exists gap")); - driver.run("create table gap (a int, b int) " + "stored as orc TBLPROPERTIES ('transactional'='true')"); - // Create one TXN to read and do not run it - driver.compileAndRespond("select * from gap"); - long first = txnMgr.getCurrentTxnId(); - - DbTxnManager txnMgr2 = (DbTxnManager) TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf); - swapTxnManager(txnMgr2); - driver2.compileAndRespond("insert into gap values(1,2)"); - long second = txnMgr2.getCurrentTxnId(); - Assert.assertTrue("Sequence number goes onward", second > first); - driver2.run(); - - // Now we run our read query it should not see the write results of the insert - swapTxnManager(txnMgr); - driver.run(); - - FetchTask fetchTask = driver.getFetchTask(); - List res = new ArrayList(); - fetchTask.fetch(res); - Assert.assertEquals("No dirty read", 0, res.size()); - - } - @Test - public void gapOpenTxnsNoDirtyRead() throws Exception { - driver.run(("drop table if exists gap")); - driver.run("create table gap (a int, b int) " + "stored as orc TBLPROPERTIES ('transactional'='true')"); - // Create one TXN to delete later - driver.compileAndRespond("select * from gap"); - long first = txnMgr.getCurrentTxnId(); - driver.run(); - // The second one we use for Low water mark - driver.run("select * from gap"); - DbTxnManager txnMgr2 = (DbTxnManager) TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf); - swapTxnManager(txnMgr2); - // Make sure, that the time window is great enough to consider the gap open - txnHandler.setOpenTxnTimeOutMillis(30000); - // Create a gap - deleteTransactionId(first); - CommandProcessorResponse resp = driver2.compileAndRespond("select * from gap"); - long third = txnMgr2.getCurrentTxnId(); - Assert.assertTrue("Sequence number goes onward", third > first); - ValidTxnList validTxns = txnMgr2.getValidTxns(); - Assert.assertEquals("Expect to see the gap as open", first, (long) validTxns.getMinOpenTxn()); - txnHandler.setOpenTxnTimeOutMillis(1000); - - // Now we cheat and create a transaction with the first sequenceId again imitating a very slow openTxns call - setBackSequence(first); - swapTxnManager(txnMgr); - driver.compileAndRespond("insert into gap values(1,2)"); - long forth = txnMgr.getCurrentTxnId(); - Assert.assertEquals(first, forth); - driver.run(); - - // Now we run our read query it should not see the write results of the insert - swapTxnManager(txnMgr2); - driver2.run(); - - FetchTask fetchTask = driver2.getFetchTask(); - List res = new ArrayList(); - fetchTask.fetch(res); - Assert.assertEquals("No dirty read", 0, res.size()); - - } - - - - @Test - public void multipleGapOpenTxnsNoDirtyRead() throws Exception { - driver.run(("drop table if exists gap")); - driver.run("create table gap (a int, b int) " + "stored as orc TBLPROPERTIES ('transactional'='true')"); - // Create some TXN to delete later - OpenTxnsResponse openTxns = txnHandler.openTxns(new OpenTxnRequest(10, "user", "local")); - openTxns.getTxn_ids().stream().forEach(txnId -> { - silentCommitTxn(new CommitTxnRequest(txnId)); - }); - - long first = openTxns.getTxn_ids().get(0); - long last = openTxns.getTxn_ids().get(9); - // The next one we use for Low water mark - driver.run("select * from gap"); - DbTxnManager txnMgr2 = (DbTxnManager) TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf); - swapTxnManager(txnMgr2); - // Make sure, that the time window is great enough to consider the gap open - txnHandler.setOpenTxnTimeOutMillis(30000); - // Create a gap - deleteTransactionId(first, last); - CommandProcessorResponse resp = driver2.compileAndRespond("select * from gap"); - long next = txnMgr2.getCurrentTxnId(); - Assert.assertTrue("Sequence number goes onward", next > last); - ValidTxnList validTxns = txnMgr2.getValidTxns(); - Assert.assertEquals("Expect to see the gap as open", first, (long) validTxns.getMinOpenTxn()); - txnHandler.setOpenTxnTimeOutMillis(1000); - - // Now we cheat and create a transaction with the first sequenceId again imitating a very slow openTxns call - setBackSequence(first); - swapTxnManager(txnMgr); - driver.compileAndRespond("insert into gap values(1,2)"); - next = txnMgr.getCurrentTxnId(); - Assert.assertEquals(first, next); - driver.run(); - - // Now we run our read query it should not see the write results of the insert - swapTxnManager(txnMgr2); - driver2.run(); - - FetchTask fetchTask = driver2.getFetchTask(); - List res = new ArrayList(); - fetchTask.fetch(res); - Assert.assertEquals("No dirty read", 0, res.size()); - - } - - @Test - public void gapOpenTxnsDirtyRead() throws Exception { - driver.run(("drop table if exists gap")); - driver.run("create table gap (a int, b int) " + "stored as orc TBLPROPERTIES ('transactional'='true')"); - // Create one TXN to delete later - driver.compileAndRespond("select * from gap"); - long first = txnMgr.getCurrentTxnId(); - driver.run(); - //The second one we use for Low water mark - driver.run("select * from gap"); - DbTxnManager txnMgr2 = (DbTxnManager) TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf); - swapTxnManager(txnMgr2); - // Now we wait for the time window to move forward - Thread.sleep(txnHandler.getOpenTxnTimeOutMillis()); - // Create a gap - deleteTransactionId(first); - CommandProcessorResponse resp = driver2.compileAndRespond("select * from gap"); - long third = txnMgr2.getCurrentTxnId(); - Assert.assertTrue("Sequence number goes onward", third > first); - ValidTxnList validTxns = txnMgr2.getValidTxns(); - Assert.assertNull("Expect to see no gap", validTxns.getMinOpenTxn()); - - // Now we cheat and create a transaction with the first sequenceId again imitating a very slow openTxns call - // This should never happen - setBackSequence(first); - swapTxnManager(txnMgr); - driver.compileAndRespond("insert into gap values(1,2)"); - long forth = txnMgr.getCurrentTxnId(); - Assert.assertEquals(first, forth); - driver.run(); - - // Now we run our read query it should unfortunately see the results of the insert - swapTxnManager(txnMgr2); - driver2.run(); - - FetchTask fetchTask = driver2.getFetchTask(); - List res = new ArrayList(); - fetchTask.fetch(res); - Assert.assertEquals("Dirty read!", 1, res.size()); - - } - private void silentCommitTxn(CommitTxnRequest commitTxnRequest) { - try { - txnHandler.commitTxn(commitTxnRequest); - } catch (Exception ex) { - throw new RuntimeException(ex); - } - } - - private void deleteTransactionId(long txnId) throws SQLException { - deleteTransactionId(txnId, txnId); - } - - private void deleteTransactionId(long minTxnId, long maxTxnId) throws SQLException { - DataSourceProvider dsp = DataSourceProviderFactory.tryGetDataSourceProviderOrNull(conf); - DataSource ds = dsp.create(conf); - Connection dbConn = ds.getConnection(); - Statement stmt = dbConn.createStatement(); - stmt.executeUpdate("DELETE FROM TXNS WHERE TXN_ID >=" + minTxnId + " AND TXN_ID <=" + maxTxnId); - dbConn.commit(); - stmt.close(); - dbConn.close(); - } - - private void setBackSequence(long txnId) throws SQLException { - DataSourceProvider dsp = DataSourceProviderFactory.tryGetDataSourceProviderOrNull(conf); - DataSource ds = dsp.create(conf); - Connection dbConn = ds.getConnection(); - Statement stmt = dbConn.createStatement(); - stmt.executeUpdate("ALTER TABLE TXNS ALTER TXN_ID RESTART WITH " + txnId); - dbConn.commit(); - stmt.close(); - dbConn.close(); - } - - public static HiveTxnManager swapTxnManager(HiveTxnManager txnMgr) { - return SessionState.get().setTxnMgr(txnMgr); - } -} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java index fbf2b8bb9a..0d39999cc1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.session.SessionState; import org.junit.Assert; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; @@ -40,6 +41,11 @@ private static HiveConf conf = new HiveConf(SemanticAnalyzer.class); private static ParseDriver pd = new ParseDriver(); + @BeforeClass + public static void initialize() { + SessionState.start(conf); + } + private static ASTNode parse(String query) throws ParseException { ASTNode nd = null; try { @@ -51,11 +57,6 @@ private static ASTNode parse(String query) throws ParseException { } public static class TestSQL11ReservedKeyWordsNegativeMisc { - @BeforeClass - public static void initialize() { - SessionState.start(conf); - } - @Test public void testSQL11ReservedKeyWords_KILL() { try { @@ -71,10 +72,6 @@ public void testSQL11ReservedKeyWords_KILL() { @RunWith(Parameterized.class) public static class TestSQL11ReservedKeyWordsNegativeParametrized { - @BeforeClass - public static void initialize() { - SessionState.start(conf); - } @Parameters(name = "{0}") public static Collection data() { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestInitiator.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestInitiator.java index e4ff14a140..1151466f8c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestInitiator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestInitiator.java @@ -218,19 +218,18 @@ public void cleanEmptyAbortedTxns() throws Exception { req.setTxnid(txnid); LockResponse res = txnHandler.lock(req); txnHandler.abortTxn(new AbortTxnRequest(txnid)); - txnHandler.setOpenTxnTimeOutMillis(30000); + conf.setIntVar(HiveConf.ConfVars.HIVE_TXN_MAX_OPEN_BATCH, TxnStore.TIMED_OUT_TXN_ABORT_BATCH_SIZE + 50); OpenTxnsResponse resp = txnHandler.openTxns(new OpenTxnRequest( TxnStore.TIMED_OUT_TXN_ABORT_BATCH_SIZE + 50, "user", "hostname")); txnHandler.abortTxns(new AbortTxnsRequest(resp.getTxn_ids())); GetOpenTxnsResponse openTxns = txnHandler.getOpenTxns(); Assert.assertEquals(TxnStore.TIMED_OUT_TXN_ABORT_BATCH_SIZE + 50 + 1, openTxns.getOpen_txnsSize()); - txnHandler.setOpenTxnTimeOutMillis(1); + startInitiator(); openTxns = txnHandler.getOpenTxns(); - // txnid:1 has txn_components, txnid:TIMED_OUT_TXN_ABORT_BATCH_SIZE + 50 + 1 is the last - Assert.assertEquals(2, openTxns.getOpen_txnsSize()); + Assert.assertEquals(1, openTxns.getOpen_txnsSize()); } @Test diff --git a/ql/src/test/queries/clientnegative/database_location_conflict.q b/ql/src/test/queries/clientnegative/database_location_conflict.q deleted file mode 100644 index 11f0801ea1..0000000000 --- a/ql/src/test/queries/clientnegative/database_location_conflict.q +++ /dev/null @@ -1,4 +0,0 @@ -CREATE DATABASE db -LOCATION '${hiveconf:hive.metastore.warehouse.dir}/db' -MANAGEDLOCATION '${hiveconf:hive.metastore.warehouse.dir}/db'; - diff --git a/ql/src/test/queries/clientnegative/database_location_conflict2.q b/ql/src/test/queries/clientnegative/database_location_conflict2.q deleted file mode 100644 index 13df728dbd..0000000000 --- a/ql/src/test/queries/clientnegative/database_location_conflict2.q +++ /dev/null @@ -1,5 +0,0 @@ -CREATE DATABASE db -LOCATION '${hiveconf:hive.metastore.warehouse.dir}/db'; - -ALTER DATABASE db SET MANAGEDLOCATION '${hiveconf:hive.metastore.warehouse.dir}/db'; - diff --git a/ql/src/test/queries/clientnegative/database_location_conflict3.q b/ql/src/test/queries/clientnegative/database_location_conflict3.q deleted file mode 100644 index 8de1646204..0000000000 --- a/ql/src/test/queries/clientnegative/database_location_conflict3.q +++ /dev/null @@ -1,4 +0,0 @@ -CREATE DATABASE db -MANAGEDLOCATION '${hiveconf:hive.metastore.warehouse.dir}/db'; - -ALTER DATABASE db SET LOCATION '${hiveconf:hive.metastore.warehouse.dir}/db'; diff --git a/ql/src/test/queries/clientpositive/char_serde.q b/ql/src/test/queries/clientpositive/char_serde.q index 99c6980369..3959ba5827 100644 --- a/ql/src/test/queries/clientpositive/char_serde.q +++ b/ql/src/test/queries/clientpositive/char_serde.q @@ -22,8 +22,8 @@ stored as textfile; load data local inpath '../../data/files/srcbucket0.txt' overwrite into table char_serde_regex; -select * from char_serde_regex order by key, value limit 5; -select value, count(*) from char_serde_regex group by value order by value limit 5; +select * from char_serde_regex limit 5; +select value, count(*) from char_serde_regex group by value limit 5; -- -- LazyBinary @@ -36,8 +36,8 @@ alter table char_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.La insert overwrite table char_serde_lb select key, value from char_serde_regex; -select * from char_serde_lb order by key, value limit 5; -select value, count(*) from char_serde_lb group by value order by value limit 5; +select * from char_serde_lb limit 5; +select value, count(*) from char_serde_lb group by value limit 5; -- -- LazySimple @@ -50,8 +50,8 @@ alter table char_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimp insert overwrite table char_serde_ls select key, value from char_serde_lb; -select * from char_serde_ls order by key, value limit 5; -select value, count(*) from char_serde_ls group by value order by value limit 5; +select * from char_serde_ls limit 5; +select value, count(*) from char_serde_ls group by value limit 5; -- -- Columnar @@ -64,8 +64,8 @@ alter table char_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.Colum insert overwrite table char_serde_c select key, value from char_serde_ls; -select * from char_serde_c order by key, value limit 5; -select value, count(*) from char_serde_c group by value order by value limit 5; +select * from char_serde_c limit 5; +select value, count(*) from char_serde_c group by value limit 5; -- -- LazyBinaryColumnar @@ -78,8 +78,8 @@ alter table char_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.Laz insert overwrite table char_serde_lbc select key, value from char_serde_c; -select * from char_serde_lbc order by key, value limit 5; -select value, count(*) from char_serde_lbc group by value order by value limit 5; +select * from char_serde_lbc limit 5; +select value, count(*) from char_serde_lbc group by value limit 5; -- -- ORC @@ -93,8 +93,8 @@ alter table char_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' insert overwrite table char_serde_orc select key, value from char_serde_lbc; -select * from char_serde_orc order by key, value limit 5; -select value, count(*) from char_serde_orc group by value order by value limit 5; +select * from char_serde_orc limit 5; +select value, count(*) from char_serde_orc group by value limit 5; drop table if exists char_serde_regex; drop table if exists char_serde_lb; diff --git a/ql/src/test/queries/clientpositive/database_location.q b/ql/src/test/queries/clientpositive/database_location.q index af55a80b9b..8571958c29 100644 --- a/ql/src/test/queries/clientpositive/database_location.q +++ b/ql/src/test/queries/clientpositive/database_location.q @@ -18,30 +18,3 @@ CREATE TABLE table_db2 (name STRING, value INT); DESCRIBE FORMATTED table_db2; SHOW TABLES; - -EXPLAIN -CREATE DATABASE db3 -LOCATION '${hiveconf:hive.metastore.warehouse.dir}/db3_ext' -MANAGEDLOCATION '${hiveconf:hive.metastore.warehouse.dir}/db3'; - -CREATE DATABASE db3 -LOCATION '${hiveconf:hive.metastore.warehouse.dir}/db3_ext' -MANAGEDLOCATION '${hiveconf:hive.metastore.warehouse.dir}/db3'; - -DESCRIBE DATABASE db3; - -EXPLAIN -ALTER DATABASE db3 SET LOCATION '${hiveconf:hive.metastore.warehouse.dir}/db3_ext_alt'; - -ALTER DATABASE db3 SET LOCATION '${hiveconf:hive.metastore.warehouse.dir}/db3_ext_alt'; - -DESCRIBE DATABASE db3; - -EXPLAIN -ALTER DATABASE db3 SET MANAGEDLOCATION '${hiveconf:hive.metastore.warehouse.dir}/db3_alt'; - -ALTER DATABASE db3 SET MANAGEDLOCATION '${hiveconf:hive.metastore.warehouse.dir}/db3_alt'; - -DESCRIBE DATABASE db3; - - diff --git a/ql/src/test/queries/clientpositive/distinct_windowing.q b/ql/src/test/queries/clientpositive/distinct_windowing.q index 51e2ab50dc..2ce1aca02f 100644 --- a/ql/src/test/queries/clientpositive/distinct_windowing.q +++ b/ql/src/test/queries/clientpositive/distinct_windowing.q @@ -17,43 +17,23 @@ create table over10k_n15( load data local inpath '../../data/files/over10k' into table over10k_n15; - EXPLAIN - SELECT fv - FROM (SELECT distinct first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n15) sq -ORDER BY fv - LIMIT 10; - - SELECT fv - FROM (SELECT distinct first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n15) sq -ORDER BY fv - LIMIT 10; - - EXPLAIN - SELECT lv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv - FROM over10k_n15) sq -ORDER BY lv - LIMIT 10; - - SELECT lv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv - FROM over10k_n15) sq -ORDER BY lv - LIMIT 10; - - EXPLAIN - SELECT lv, fv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv, - first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n15) sq -ORDER BY lv, fv - LIMIT 50; - - SELECT lv, fv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv, - first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n15) sq -ORDER BY lv, fv - LIMIT 50; +explain +select distinct first_value(t) over ( partition by si order by i ) from over10k_n15 limit 10; + +select distinct first_value(t) over ( partition by si order by i ) from over10k_n15 limit 10; + +explain +select distinct last_value(i) over ( partition by si order by i ) +from over10k_n15 limit 10; + +select distinct last_value(i) over ( partition by si order by i ) +from over10k_n15 limit 10; + +explain +select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k_n15 limit 50; + +select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k_n15 limit 50; diff --git a/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q b/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q index a8d67de749..0748b80792 100644 --- a/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q +++ b/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q @@ -19,69 +19,45 @@ create table over10k_n14( load data local inpath '../../data/files/over10k' into table over10k_n14; - EXPLAIN - SELECT fv - FROM (SELECT distinct first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n14) sq -ORDER BY fv - LIMIT 10; +explain +select distinct first_value(t) over ( partition by si order by i ) from over10k_n14 limit 10; - SELECT fv - FROM (SELECT distinct first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n14) sq -ORDER BY fv - LIMIT 10; +select distinct first_value(t) over ( partition by si order by i ) from over10k_n14 limit 10; - EXPLAIN - SELECT lv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv - FROM over10k_n14) sq -ORDER BY lv - LIMIT 10; +explain +select distinct last_value(i) over ( partition by si order by i ) +from over10k_n14 limit 10; - SELECT lv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv - FROM over10k_n14) sq -ORDER BY lv - LIMIT 10; +select distinct last_value(i) over ( partition by si order by i ) +from over10k_n14 limit 10; - EXPLAIN - SELECT lv, fv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv, - first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n14) sq -ORDER BY lv, fv - LIMIT 50; +explain +select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k_n14 limit 50; - SELECT lv, fv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv, - first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n14) sq -ORDER BY lv, fv - LIMIT 50; +select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k_n14 limit 50; explain -select si, max(f) mf, rank() over ( partition by si order by mf ) r +select si, max(f) mf, rank() over ( partition by si order by mf ) FROM over10k_n14 GROUP BY si HAVING max(f) > 0 -ORDER BY si, r limit 50; -select si, max(f) mf, rank() over ( partition by si order by mf ) r +select si, max(f) mf, rank() over ( partition by si order by mf ) FROM over10k_n14 GROUP BY si HAVING max(f) > 0 -ORDER BY si, r limit 50; explain -select distinct si, rank() over ( partition by si order by i ) r +select distinct si, rank() over ( partition by si order by i ) FROM over10k_n14 -ORDER BY si, r limit 50; -select distinct si, rank() over ( partition by si order by i ) r +select distinct si, rank() over ( partition by si order by i ) FROM over10k_n14 -ORDER BY si, r limit 50; diff --git a/ql/src/test/queries/clientpositive/gby_star.q b/ql/src/test/queries/clientpositive/gby_star.q index effc622e7b..f1174b92c6 100644 --- a/ql/src/test/queries/clientpositive/gby_star.q +++ b/ql/src/test/queries/clientpositive/gby_star.q @@ -1,20 +1,19 @@ --! qt:dataset:src set hive.mapred.mode=nonstrict; - explain -select *, sum(key) from src group by key, value order by key, value limit 10; -select *, sum(key) from src group by key, value order by key, value limit 10; +select *, sum(key) from src group by key, value limit 10; +select *, sum(key) from src group by key, value limit 10; explain -select *, sum(key) from src where key < 100 group by key, value order by key, value limit 10; -select *, sum(key) from src where key < 100 group by key, value order by key, value limit 10; +select *, sum(key) from src where key < 100 group by key, value limit 10; +select *, sum(key) from src where key < 100 group by key, value limit 10; explain -select *, sum(key) from (select key from src where key < 100) a group by key order by key limit 10; -select *, sum(key) from (select key from src where key < 100) a group by key order by key limit 10; +select *, sum(key) from (select key from src where key < 100) a group by key limit 10; +select *, sum(key) from (select key from src where key < 100) a group by key limit 10; explain select a.*, sum(src.key) from (select key from src where key < 100) a -inner join src on a.key = src.key group by a.key order by a.key limit 10; +inner join src on a.key = src.key group by a.key limit 10; select a.*, sum(src.key) from (select key from src where key < 100) a -inner join src on a.key = src.key group by a.key order by a.key limit 10; +inner join src on a.key = src.key group by a.key limit 10; diff --git a/ql/src/test/queries/clientpositive/multi_insert_partitioned.q b/ql/src/test/queries/clientpositive/multi_insert_partitioned.q index 991dc8d5e4..c37fa70fa7 100644 --- a/ql/src/test/queries/clientpositive/multi_insert_partitioned.q +++ b/ql/src/test/queries/clientpositive/multi_insert_partitioned.q @@ -1,5 +1,4 @@ --! qt:dataset:src --- SORT_QUERY_RESULTS set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; diff --git a/ql/src/test/queries/clientpositive/parquet_timestampt_to_bigint.q b/ql/src/test/queries/clientpositive/parquet_timestampt_to_bigint.q deleted file mode 100644 index 5aa4ab14fc..0000000000 --- a/ql/src/test/queries/clientpositive/parquet_timestampt_to_bigint.q +++ /dev/null @@ -1,25 +0,0 @@ -set hive.vectorized.execution.enabled=false; -set parquet.column.index.access=true; - --- Test paquet table with Timestamp Col to BigInt convertion -dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/parquet_format_ts; - -DROP TABLE ts_pq; - -CREATE EXTERNAL TABLE ts_pq (ts1 TIMESTAMP) - STORED AS PARQUET - LOCATION '${system:test.tmp.dir}/parquet_format_ts'; - -INSERT INTO ts_pq VALUES ('1998-10-03 09:58:31.231'); - -SELECT * FROM ts_pq; - --- Now use data from another table that uses TS as a BIGINT - -CREATE EXTERNAL TABLE ts_pq_2 (ts2 BIGINT) - STORED AS PARQUET - LOCATION '${system:test.tmp.dir}/parquet_format_ts'; - -SELECT * FROM ts_pq_2; - -dfs -rmr ${system:test.tmp.dir}/parquet_format_ts; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/partition_wise_fileformat15.q b/ql/src/test/queries/clientpositive/partition_wise_fileformat15.q index f908cfda2d..033e1235eb 100644 --- a/ql/src/test/queries/clientpositive/partition_wise_fileformat15.q +++ b/ql/src/test/queries/clientpositive/partition_wise_fileformat15.q @@ -1,5 +1,5 @@ --! qt:dataset:src --- SORT_QUERY_RESULTS +--SORT_QUERY_RESULTS set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; -- This tests that the schema can be changed for binary serde data diff --git a/ql/src/test/queries/clientpositive/partition_wise_fileformat16.q b/ql/src/test/queries/clientpositive/partition_wise_fileformat16.q index da3ae0910e..703b21469e 100644 --- a/ql/src/test/queries/clientpositive/partition_wise_fileformat16.q +++ b/ql/src/test/queries/clientpositive/partition_wise_fileformat16.q @@ -1,5 +1,4 @@ --! qt:dataset:src --- SORT_QUERY_RESULTS set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; -- This tests that the schema can be changed for binary serde data diff --git a/ql/src/test/queries/clientpositive/reset_conf.q b/ql/src/test/queries/clientpositive/reset_conf.q index c841ff051b..8420d0225b 100644 --- a/ql/src/test/queries/clientpositive/reset_conf.q +++ b/ql/src/test/queries/clientpositive/reset_conf.q @@ -4,14 +4,11 @@ set hive.skewjoin.key=300000; set hive.skewjoin.mapjoin.min.split=256000000; set hive.skewjoin.key; set hive.skewjoin.mapjoin.min.split; -set hive.query.max.length; reset; set hive.skewjoin.key; set hive.skewjoin.mapjoin.min.split; --- Should not be set back to 10Mb, should keep 100Mb -set hive.query.max.length; set hive.skewjoin.key=300000; set hive.skewjoin.mapjoin.min.split=256000000; @@ -30,8 +27,3 @@ select 'After resetting both to default'; set hive.skewjoin.key; set hive.skewjoin.mapjoin.min.split; --- Double reset to check if the System.property setting is not reverted -reset; - --- Should not be set back to 10Mb, should keep 100Mb -set hive.query.max.length; diff --git a/ql/src/test/queries/clientpositive/sketches_materialized_view_rollup.q b/ql/src/test/queries/clientpositive/sketches_materialized_view_rollup.q index 3beea62e30..e1a2054ef0 100644 --- a/ql/src/test/queries/clientpositive/sketches_materialized_view_rollup.q +++ b/ql/src/test/queries/clientpositive/sketches_materialized_view_rollup.q @@ -1,4 +1,9 @@ ---! qt:transactional + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; set hive.fetch.task.conversion=none; create table sketch_input (id int, category char(1)) diff --git a/ql/src/test/queries/clientpositive/sketches_materialized_view_rollup2.q b/ql/src/test/queries/clientpositive/sketches_materialized_view_rollup2.q deleted file mode 100644 index 39c7ac49af..0000000000 --- a/ql/src/test/queries/clientpositive/sketches_materialized_view_rollup2.q +++ /dev/null @@ -1,54 +0,0 @@ ---! qt:transactional -set hive.fetch.task.conversion=none; - -create table sketch_input (id int, category char(1)) -STORED AS ORC -TBLPROPERTIES ('transactional'='true'); - -insert into table sketch_input values - (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), - (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') -; - --- create an mv for the intermediate results -create materialized view mv_1 as - select category, ds_hll_sketch(id),count(id) from sketch_input group by category; - --- bi mode on -set hive.optimize.bi.enabled=true; - -explain -select 'rewrite; mv matching', category, count(distinct id) from sketch_input group by category; -select 'rewrite; mv matching', category, count(distinct id) from sketch_input group by category; - -set hive.optimize.bi.enabled=false; - -explain -select 'no rewrite; no mv usage', category, count(distinct id) from sketch_input group by category; -select 'no rewrite; no mv usage', category, count(distinct id) from sketch_input group by category; - -set hive.optimize.bi.enabled=true; - -insert into table sketch_input values - (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), - (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') -; - -explain -select 'rewrite; but no mv usage', category, count(distinct id) from sketch_input group by category; -select 'rewrite; but no mv usage', category, count(distinct id) from sketch_input group by category; - -explain -alter materialized view mv_1 rebuild; -alter materialized view mv_1 rebuild; - -explain -select 'rewrite; mv matching', category, count(distinct id) from sketch_input group by category; -select 'rewrite; mv matching', category, count(distinct id) from sketch_input group by category; - --- rewrite+mv matching with rollup -explain -select 'rewrite;mv matching with rollup',count(distinct id) from sketch_input; -select 'rewrite;mv matching with rollup',count(distinct id) from sketch_input; - -drop materialized view mv_1; diff --git a/ql/src/test/queries/clientpositive/sketches_rewrite.q b/ql/src/test/queries/clientpositive/sketches_rewrite.q deleted file mode 100644 index 0420d62c81..0000000000 --- a/ql/src/test/queries/clientpositive/sketches_rewrite.q +++ /dev/null @@ -1,19 +0,0 @@ ---! qt:transactional - -set hive.optimize.bi.enabled=true; - -create table sketch_input (id int, category char(1)) -STORED AS ORC -TBLPROPERTIES ('transactional'='true'); - -insert into table sketch_input values - (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), - (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') -; - --- see if rewrite happens -explain -select category, count(distinct id) from sketch_input group by category; - -select category, count(distinct id) from sketch_input group by category; - diff --git a/ql/src/test/queries/clientpositive/vector_offset_limit_reduce.q b/ql/src/test/queries/clientpositive/vector_offset_limit_reduce.q deleted file mode 100644 index 78cf5db3eb..0000000000 --- a/ql/src/test/queries/clientpositive/vector_offset_limit_reduce.q +++ /dev/null @@ -1,16 +0,0 @@ -set hive.vectorized.execution.reduce.enabled=true; - -CREATE EXTERNAL TABLE orderdatatest_ext (col1 int, col2 int); -INSERT INTO orderdatatest_ext VALUES - (1,1), - (1,2), - (1,3), - (1,4), - (1,5), - (1,6), - (1,7), - (1,8), - (1,9), - (1,10); - -select * from orderdatatest_ext order by col1 limit 5,100 diff --git a/ql/src/test/results/clientnegative/database_location_conflict.q.out b/ql/src/test/results/clientnegative/database_location_conflict.q.out deleted file mode 100644 index 8034185af8..0000000000 --- a/ql/src/test/results/clientnegative/database_location_conflict.q.out +++ /dev/null @@ -1,6 +0,0 @@ -PREHOOK: query: CREATE DATABASE db -#### A masked pattern was here #### -PREHOOK: type: CREATEDATABASE -PREHOOK: Output: database:db -#### A masked pattern was here #### -FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.ddl.DDLTask. Managed and external locations for database cannot be the same diff --git a/ql/src/test/results/clientnegative/database_location_conflict2.q.out b/ql/src/test/results/clientnegative/database_location_conflict2.q.out deleted file mode 100644 index 80786bf86d..0000000000 --- a/ql/src/test/results/clientnegative/database_location_conflict2.q.out +++ /dev/null @@ -1,14 +0,0 @@ -PREHOOK: query: CREATE DATABASE db -#### A masked pattern was here #### -PREHOOK: type: CREATEDATABASE -PREHOOK: Output: database:db -#### A masked pattern was here #### -POSTHOOK: query: CREATE DATABASE db -#### A masked pattern was here #### -POSTHOOK: type: CREATEDATABASE -POSTHOOK: Output: database:db -#### A masked pattern was here #### -PREHOOK: type: ALTERDATABASE_LOCATION -PREHOOK: Output: database:db -#### A masked pattern was here #### -FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.ddl.DDLTask. Managed and external locations for database cannot be the same diff --git a/ql/src/test/results/clientnegative/database_location_conflict3.q.out b/ql/src/test/results/clientnegative/database_location_conflict3.q.out deleted file mode 100644 index 80786bf86d..0000000000 --- a/ql/src/test/results/clientnegative/database_location_conflict3.q.out +++ /dev/null @@ -1,14 +0,0 @@ -PREHOOK: query: CREATE DATABASE db -#### A masked pattern was here #### -PREHOOK: type: CREATEDATABASE -PREHOOK: Output: database:db -#### A masked pattern was here #### -POSTHOOK: query: CREATE DATABASE db -#### A masked pattern was here #### -POSTHOOK: type: CREATEDATABASE -POSTHOOK: Output: database:db -#### A masked pattern was here #### -PREHOOK: type: ALTERDATABASE_LOCATION -PREHOOK: Output: database:db -#### A masked pattern was here #### -FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.ddl.DDLTask. Managed and external locations for database cannot be the same diff --git a/ql/src/test/results/clientnegative/insert_into1.q.out b/ql/src/test/results/clientnegative/insert_into1.q.out index 0d3439e150..066f04d1cc 100644 --- a/ql/src/test/results/clientnegative/insert_into1.q.out +++ b/ql/src/test/results/clientnegative/insert_into1.q.out @@ -15,4 +15,4 @@ PREHOOK: type: LOCKTABLE POSTHOOK: query: LOCK TABLE insert_into1_neg SHARED POSTHOOK: type: LOCKTABLE Unable to acquire IMPLICIT, EXCLUSIVE lock default@insert_into1_neg after 5 attempts. -FAILED: Error in acquiring locks: Locks on the underlying objects cannot be acquired, retry after some time. +FAILED: Error in acquiring locks: Locks on the underlying objects cannot be acquired. retry after some time diff --git a/ql/src/test/results/clientnegative/insert_into2.q.out b/ql/src/test/results/clientnegative/insert_into2.q.out index f19d67b88c..b839efeb7d 100644 --- a/ql/src/test/results/clientnegative/insert_into2.q.out +++ b/ql/src/test/results/clientnegative/insert_into2.q.out @@ -15,4 +15,4 @@ PREHOOK: type: LOCKTABLE POSTHOOK: query: LOCK TABLE insert_into1_neg EXCLUSIVE POSTHOOK: type: LOCKTABLE Unable to acquire IMPLICIT, EXCLUSIVE lock default@insert_into1_neg after 5 attempts. -FAILED: Error in acquiring locks: Locks on the underlying objects cannot be acquired, retry after some time. +FAILED: Error in acquiring locks: Locks on the underlying objects cannot be acquired. retry after some time diff --git a/ql/src/test/results/clientnegative/insert_into3.q.out b/ql/src/test/results/clientnegative/insert_into3.q.out index 44be16866d..91aeb5b9a1 100644 --- a/ql/src/test/results/clientnegative/insert_into3.q.out +++ b/ql/src/test/results/clientnegative/insert_into3.q.out @@ -29,4 +29,4 @@ PREHOOK: type: LOCKTABLE POSTHOOK: query: LOCK TABLE insert_into3_neg PARTITION (ds='1') SHARED POSTHOOK: type: LOCKTABLE Unable to acquire IMPLICIT, EXCLUSIVE lock default@insert_into3_neg@ds=1 after 5 attempts. -FAILED: Error in acquiring locks: Locks on the underlying objects cannot be acquired, retry after some time. +FAILED: Error in acquiring locks: Locks on the underlying objects cannot be acquired. retry after some time diff --git a/ql/src/test/results/clientnegative/insert_into4.q.out b/ql/src/test/results/clientnegative/insert_into4.q.out index 0cc02ffd6c..303fb6aa2e 100644 --- a/ql/src/test/results/clientnegative/insert_into4.q.out +++ b/ql/src/test/results/clientnegative/insert_into4.q.out @@ -29,4 +29,4 @@ PREHOOK: type: LOCKTABLE POSTHOOK: query: LOCK TABLE insert_into3_neg PARTITION (ds='1') EXCLUSIVE POSTHOOK: type: LOCKTABLE Unable to acquire IMPLICIT, EXCLUSIVE lock default@insert_into3_neg@ds=1 after 5 attempts. -FAILED: Error in acquiring locks: Locks on the underlying objects cannot be acquired, retry after some time. +FAILED: Error in acquiring locks: Locks on the underlying objects cannot be acquired. retry after some time diff --git a/ql/src/test/results/clientnegative/lockneg_try_drop_locked_db.q.out b/ql/src/test/results/clientnegative/lockneg_try_drop_locked_db.q.out index 964b776004..e66965e693 100644 --- a/ql/src/test/results/clientnegative/lockneg_try_drop_locked_db.q.out +++ b/ql/src/test/results/clientnegative/lockneg_try_drop_locked_db.q.out @@ -17,4 +17,4 @@ PREHOOK: type: SHOWLOCKS POSTHOOK: query: show locks database lockneg9 POSTHOOK: type: SHOWLOCKS Unable to acquire IMPLICIT, EXCLUSIVE lock lockneg9 after 1 attempts. -FAILED: Error in acquiring locks: Locks on the underlying objects cannot be acquired, retry after some time. +FAILED: Error in acquiring locks: Locks on the underlying objects cannot be acquired. retry after some time diff --git a/ql/src/test/results/clientnegative/update_notnull_constraint.q.out b/ql/src/test/results/clientnegative/update_notnull_constraint.q.out index 86bfc67480..32905378e7 100644 --- a/ql/src/test/results/clientnegative/update_notnull_constraint.q.out +++ b/ql/src/test/results/clientnegative/update_notnull_constraint.q.out @@ -21,4 +21,9 @@ POSTHOOK: Output: default@acid_uami POSTHOOK: Lineage: acid_uami.de SCRIPT [] POSTHOOK: Lineage: acid_uami.i SCRIPT [] POSTHOOK: Lineage: acid_uami.vc SCRIPT [] -FAILED: DataConstraintViolationError org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: Either CHECK or NOT NULL constraint violated! +PREHOOK: query: UPDATE acid_uami set de=null where i=1 +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_uami +PREHOOK: Output: default@acid_uami +#### A masked pattern was here #### +FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask diff --git a/ql/src/test/results/clientpositive/llap/acid_mapjoin.q.out b/ql/src/test/results/clientpositive/acid_mapjoin.q.out similarity index 53% rename from ql/src/test/results/clientpositive/llap/acid_mapjoin.q.out rename to ql/src/test/results/clientpositive/acid_mapjoin.q.out index 6b6a3a1aed..f66e978a01 100644 --- a/ql/src/test/results/clientpositive/llap/acid_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/acid_mapjoin.q.out @@ -71,89 +71,83 @@ POSTHOOK: Input: default@acid1 POSTHOOK: Input: default@acid2 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: acid1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 316 Data size: 1264 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 316 Data size: 1264 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 316 Data size: 1264 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 8295 Data size: 66360 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 3 - Map Operator Tree: - TableScan - alias: acid2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:acid2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:acid2 + TableScan + alias: acid2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: acid1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 316 Data size: 1264 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 316 Data size: 1264 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 316 Data size: 1264 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 8295 Data size: 66360 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/acid_nullscan.q.out b/ql/src/test/results/clientpositive/acid_nullscan.q.out new file mode 100644 index 0000000000..0e5c2418d0 --- /dev/null +++ b/ql/src/test/results/clientpositive/acid_nullscan.q.out @@ -0,0 +1,177 @@ +PREHOOK: query: CREATE TABLE acid_vectorized_n1(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_vectorized_n1 +POSTHOOK: query: CREATE TABLE acid_vectorized_n1(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_vectorized_n1 +PREHOOK: query: insert into table acid_vectorized_n1 select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@acid_vectorized_n1 +POSTHOOK: query: insert into table acid_vectorized_n1 select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@acid_vectorized_n1 +POSTHOOK: Lineage: acid_vectorized_n1.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: acid_vectorized_n1.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: insert into table acid_vectorized_n1 values (1, 'bar') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@acid_vectorized_n1 +POSTHOOK: query: insert into table acid_vectorized_n1 values (1, 'bar') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@acid_vectorized_n1 +POSTHOOK: Lineage: acid_vectorized_n1.a SCRIPT [] +POSTHOOK: Lineage: acid_vectorized_n1.b SCRIPT [] +PREHOOK: query: explain extended +select sum(a) from acid_vectorized_n1 where false +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain extended +select sum(a) from acid_vectorized_n1 where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized_n1 +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT SUM(`a`) AS `$f0` +FROM `default`.`acid_vectorized_n1` +LIMIT 0 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: acid_vectorized_n1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: + nullscan://null/default.acid_vectorized_n1/part_ [$hdt$_0:$hdt$_0:acid_vectorized_n1] + Path -> Partition: + nullscan://null/default.acid_vectorized_n1/part_ + Partition + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"a":"true","b":"true"}} + bucket_count 2 + bucket_field_name a + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.acid_vectorized_n1 + numFiles 3 + numRows 11 + rawDataSize 0 + serialization.ddl struct acid_vectorized_n1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + totalSize 2602 + transactional true + transactional_properties default +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"a":"true","b":"true"}} + bucket_count 2 + bucket_field_name a + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.acid_vectorized_n1 + numFiles 3 + numRows 11 + rawDataSize 0 + serialization.ddl struct acid_vectorized_n1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 2602 + transactional true + transactional_properties default +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acid_vectorized_n1 + name: default.acid_vectorized_n1 + Truncated Path -> Alias: + nullscan://null/default.acid_vectorized_n1/part_ [$hdt$_0:$hdt$_0:acid_vectorized_n1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(a) from acid_vectorized_n1 where false +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized_n1 +#### A masked pattern was here #### +POSTHOOK: query: select sum(a) from acid_vectorized_n1 where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized_n1 +#### A masked pattern was here #### +NULL diff --git a/ql/src/test/results/clientpositive/llap/acid_stats2.q.out b/ql/src/test/results/clientpositive/acid_stats2.q.out similarity index 99% rename from ql/src/test/results/clientpositive/llap/acid_stats2.q.out rename to ql/src/test/results/clientpositive/acid_stats2.q.out index dfacb613e5..45c903fb87 100644 --- a/ql/src/test/results/clientpositive/llap/acid_stats2.q.out +++ b/ql/src/test/results/clientpositive/acid_stats2.q.out @@ -644,7 +644,7 @@ Table Parameters: numPartitions 2 numRows 1 rawDataSize 0 - totalSize 2941 + totalSize 2945 transactional true transactional_properties default #### A masked pattern was here #### @@ -720,7 +720,7 @@ Partition Parameters: numFiles 2 numRows 1 rawDataSize 0 - totalSize 1493 + totalSize 1497 #### A masked pattern was here #### # Storage Information diff --git a/ql/src/test/results/clientpositive/llap/acid_stats3.q.out b/ql/src/test/results/clientpositive/acid_stats3.q.out similarity index 66% rename from ql/src/test/results/clientpositive/llap/acid_stats3.q.out rename to ql/src/test/results/clientpositive/acid_stats3.q.out index 83d52c93cc..f9b0c57522 100644 --- a/ql/src/test/results/clientpositive/llap/acid_stats3.q.out +++ b/ql/src/test/results/clientpositive/acid_stats3.q.out @@ -54,49 +54,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_nonpart - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(key) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: stats_nonpart + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial + aggregations: count(key) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -240,49 +231,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_part - Statistics: Num rows: 2 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 2 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(key) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: stats_part + Statistics: Num rows: 2 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 2 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial + aggregations: count(key) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -358,49 +340,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_part - Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(key) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: stats_part + Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial + aggregations: count(key) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/acid_stats4.q.out b/ql/src/test/results/clientpositive/acid_stats4.q.out similarity index 72% rename from ql/src/test/results/clientpositive/llap/acid_stats4.q.out rename to ql/src/test/results/clientpositive/acid_stats4.q.out index 2b6cba45fd..279701d786 100644 --- a/ql/src/test/results/clientpositive/llap/acid_stats4.q.out +++ b/ql/src/test/results/clientpositive/acid_stats4.q.out @@ -56,49 +56,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_nonpart - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key2 (type: int) - outputColumnNames: key2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(key2) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: stats_nonpart + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key2 (type: int) + outputColumnNames: key2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial + aggregations: count(key2) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -272,49 +263,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_nonpart2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key2 (type: int) - outputColumnNames: key2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(key2) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: stats_nonpart2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key2 (type: int) + outputColumnNames: key2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial + aggregations: count(key2) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -655,49 +637,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_part - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key2 (type: int) - outputColumnNames: key2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(key2) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: stats_part + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key2 (type: int) + outputColumnNames: key2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial + aggregations: count(key2) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -792,49 +764,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_part - Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - Select Operator - expressions: key2 (type: int) - outputColumnNames: key2 - Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - Group By Operator - aggregations: count(key2) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col0 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: stats_part + Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: key2 (type: int) + outputColumnNames: key2 + Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial + aggregations: count(key2) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -890,49 +852,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_part - Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - Select Operator - expressions: key2 (type: int) - outputColumnNames: key2 - Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - Group By Operator - aggregations: count(key2) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col0 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: stats_part + Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: key2 (type: int) + outputColumnNames: key2 + Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial + aggregations: count(key2) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -968,49 +920,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_part - Statistics: Num rows: 2 Data size: 8 Basic stats: PARTIAL Column stats: PARTIAL - Select Operator - expressions: key2 (type: int) - outputColumnNames: key2 - Statistics: Num rows: 2 Data size: 8 Basic stats: PARTIAL Column stats: PARTIAL - Group By Operator - aggregations: count(key2) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col0 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: stats_part + Statistics: Num rows: 2 Data size: 8 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: key2 (type: int) + outputColumnNames: key2 + Statistics: Num rows: 2 Data size: 8 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial + aggregations: count(key2) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/acid_stats5.q.out b/ql/src/test/results/clientpositive/acid_stats5.q.out similarity index 69% rename from ql/src/test/results/clientpositive/llap/acid_stats5.q.out rename to ql/src/test/results/clientpositive/acid_stats5.q.out index 8a891eb027..4ae023fc4e 100644 --- a/ql/src/test/results/clientpositive/llap/acid_stats5.q.out +++ b/ql/src/test/results/clientpositive/acid_stats5.q.out @@ -140,47 +140,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats2 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: stats2 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -198,74 +189,74 @@ POSTHOOK: Input: default@stats2 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats2 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: int) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: stats2 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial + keys: key (type: int) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col0) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: partial2 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + mode: partial2 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -369,49 +360,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(key) - minReductionHashAggr: 0.6666666 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: stats2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial + aggregations: min(key) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -611,74 +593,74 @@ POSTHOOK: Input: default@stats2 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: int) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: stats2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial + keys: key (type: int) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col0) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: partial2 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + mode: partial2 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -782,49 +764,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(key) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: stats2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial + aggregations: min(key) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/acid_table_stats.q.out b/ql/src/test/results/clientpositive/acid_table_stats.q.out similarity index 92% rename from ql/src/test/results/clientpositive/llap/acid_table_stats.q.out rename to ql/src/test/results/clientpositive/acid_table_stats.q.out index 4daf7e3468..c4bc80ee12 100644 --- a/ql/src/test/results/clientpositive/llap/acid_table_stats.q.out +++ b/ql/src/test/results/clientpositive/acid_table_stats.q.out @@ -97,7 +97,7 @@ Partition Parameters: numFiles 2 numRows 1000 rawDataSize 0 - totalSize 4361 + totalSize 4384 #### A masked pattern was here #### # Storage Information @@ -184,7 +184,7 @@ Partition Parameters: numFiles 2 numRows 1000 rawDataSize 0 - totalSize 4361 + totalSize 4384 #### A masked pattern was here #### # Storage Information @@ -235,7 +235,7 @@ Partition Parameters: numFiles 2 numRows 1000 rawDataSize 0 - totalSize 4361 + totalSize 4384 #### A masked pattern was here #### # Storage Information @@ -331,7 +331,7 @@ Partition Parameters: numFiles 4 numRows 2000 rawDataSize 0 - totalSize 8722 + totalSize 8769 #### A masked pattern was here #### # Storage Information @@ -380,7 +380,7 @@ Partition Parameters: numFiles 4 numRows 2000 rawDataSize 0 - totalSize 8722 + totalSize 8769 #### A masked pattern was here #### # Storage Information @@ -450,50 +450,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: acid - filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: max(key) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: acid + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial + aggregations: max(key) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -602,7 +593,7 @@ Partition Parameters: numFiles 2 numRows 1000 rawDataSize 176000 - totalSize 3325 + totalSize 3326 #### A masked pattern was here #### # Storage Information diff --git a/ql/src/test/results/clientpositive/llap/acid_view_delete.q.out b/ql/src/test/results/clientpositive/acid_view_delete.q.out similarity index 98% rename from ql/src/test/results/clientpositive/llap/acid_view_delete.q.out rename to ql/src/test/results/clientpositive/acid_view_delete.q.out index 89ea298dca..a1de850a17 100644 --- a/ql/src/test/results/clientpositive/llap/acid_view_delete.q.out +++ b/ql/src/test/results/clientpositive/acid_view_delete.q.out @@ -94,7 +94,7 @@ POSTHOOK: Input: default@mydim #### A masked pattern was here #### 1 bob 95136 true 2 joe 70068 true -Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: update mydim set is_current = false where mydim.key not in(select kv from updates_staging_view) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/alias_casted_column.q.out b/ql/src/test/results/clientpositive/alias_casted_column.q.out new file mode 100644 index 0000000000..7a6620c799 --- /dev/null +++ b/ql/src/test/results/clientpositive/alias_casted_column.q.out @@ -0,0 +1,76 @@ +PREHOOK: query: explain select key from (select cast(key as int) from src )t +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select key from (select cast(key as int) from src )t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select key2 from (select cast(key as int) key2 from src )t +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select key2 from (select cast(key as int) key2 from src )t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/allcolref_in_udf.q.out b/ql/src/test/results/clientpositive/allcolref_in_udf.q.out new file mode 100644 index 0000000000..87713544f5 --- /dev/null +++ b/ql/src/test/results/clientpositive/allcolref_in_udf.q.out @@ -0,0 +1,258 @@ +PREHOOK: query: explain +select concat(*),array(*) from src where key < 100 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select concat(*),array(*) from src where key < 100 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: concat(key, value) (type: string), array(key,value) (type: array) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 349264 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 21040 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 21040 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select concat(*),array(*) from src where key < 100 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select concat(*),array(*) from src where key < 100 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +86val_86 ["86","val_86"] +27val_27 ["27","val_27"] +98val_98 ["98","val_98"] +66val_66 ["66","val_66"] +37val_37 ["37","val_37"] +15val_15 ["15","val_15"] +82val_82 ["82","val_82"] +17val_17 ["17","val_17"] +0val_0 ["0","val_0"] +57val_57 ["57","val_57"] +PREHOOK: query: explain +select stack(2, *) as (e1,e2,e3) from ( + select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) + from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select stack(2, *) as (e1,e2,e3) from ( + select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) + from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), concat(key, value) (type: string), (UDFToDouble(key) + 1.0D) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: double) + Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + TableScan + alias: b + filterExpr: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), concat(key, value) (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: double) + Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: double) + 1 _col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 + Statistics: Num rows: 262 Data size: 189688 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 2 (type: int), concat(_col0, _col1, _col4, _col5) (type: string), _col2 (type: string), _col6 (type: string), concat(_col0, _col1, _col4) (type: string), concat(_col0, _col4, _col5) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 262 Data size: 242088 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 262 Data size: 242088 Basic stats: COMPLETE Column stats: COMPLETE + function name: stack + Select Operator + expressions: col0 (type: string), col1 (type: string), col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 262 Data size: 2096 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select stack(2, *) as (e1,e2,e3) from ( + select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) + from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select stack(2, *) as (e1,e2,e3) from ( + select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) + from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4val_45val_5 4val_4 5val_5 +4val_45 NULL 5val_5 +4val_45val_5 4val_4 5val_5 +4val_45 NULL 5val_5 +4val_45val_5 4val_4 5val_5 +4val_45 NULL 5val_5 +8val_89val_9 8val_8 9val_9 +8val_89 NULL 9val_9 +9val_910val_10 9val_9 10val_10 +9val_910 NULL 10val_10 +PREHOOK: query: create table allcolref as select array(key, value) from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@allcolref +POSTHOOK: query: create table allcolref as select array(key, value) from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@allcolref +POSTHOOK: Lineage: allcolref._c0 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain select explode(*) as x from allcolref limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@allcolref +#### A masked pattern was here #### +POSTHOOK: query: explain select explode(*) as x from allcolref limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@allcolref +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: allcolref + Statistics: Num rows: 500 Data size: 913920 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _c0 (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 913920 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 500 Data size: 913920 Basic stats: COMPLETE Column stats: NONE + function name: explode + Select Operator + expressions: col (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 913920 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 18270 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 18270 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select explode(*) as x from allcolref limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@allcolref +#### A masked pattern was here #### +POSTHOOK: query: select explode(*) as x from allcolref limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@allcolref +#### A masked pattern was here #### +238 +val_238 +86 +val_86 +311 +val_311 +27 +val_27 +165 +val_165 diff --git a/ql/src/test/results/clientpositive/llap/alterColumnStatsPart.q.out b/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out similarity index 69% rename from ql/src/test/results/clientpositive/llap/alterColumnStatsPart.q.out rename to ql/src/test/results/clientpositive/alterColumnStatsPart.q.out index a62f9ff086..fe17d00211 100644 --- a/ql/src/test/results/clientpositive/llap/alterColumnStatsPart.q.out +++ b/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out @@ -131,50 +131,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p - filterExpr: (c = 1) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int) - outputColumnNames: a - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(a) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: p + filterExpr: (c = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial + aggregations: max(a) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -287,50 +278,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p - filterExpr: (c = 4) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int) - outputColumnNames: a - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(a) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: p + filterExpr: (c = 4) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial + aggregations: max(a) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -398,50 +380,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p - filterExpr: (c = 100) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: a (type: int) - outputColumnNames: a - Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: max(a) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: p + filterExpr: (c = 100) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial + aggregations: max(a) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -479,50 +452,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p - filterExpr: (c = 100) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: a (type: int) - outputColumnNames: a - Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: max(a) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: p + filterExpr: (c = 100) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial + aggregations: max(a) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/alter_change_db_location.q.out b/ql/src/test/results/clientpositive/alter_change_db_location.q.out similarity index 100% rename from ql/src/test/results/clientpositive/llap/alter_change_db_location.q.out rename to ql/src/test/results/clientpositive/alter_change_db_location.q.out diff --git a/ql/src/test/results/clientpositive/llap/alter_db_owner.q.out b/ql/src/test/results/clientpositive/alter_db_owner.q.out similarity index 100% rename from ql/src/test/results/clientpositive/llap/alter_db_owner.q.out rename to ql/src/test/results/clientpositive/alter_db_owner.q.out diff --git a/ql/src/test/results/clientpositive/llap/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out similarity index 97% rename from ql/src/test/results/clientpositive/llap/alter_partition_coltype.q.out rename to ql/src/test/results/clientpositive/alter_partition_coltype.q.out index afffbd2862..183cc4f8be 100644 --- a/ql/src/test/results/clientpositive/llap/alter_partition_coltype.q.out +++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -354,10 +354,12 @@ STAGE PLANS: TableScan alias: alter_coltype filterExpr: dt is not null (type: boolean) + Statistics: Num rows: 50 Data size: 18350 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: key (type: string), value (type: string), dt (type: string), ts (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 50 Data size: 18350 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select count(*) from alter_coltype where ts = 3.0 @@ -551,10 +553,12 @@ STAGE PLANS: TableScan alias: alterdynamic_part_table filterExpr: ((partcol1 = 1) and (partcol2 = '1')) (type: boolean) + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: intcol (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select intcol from pt.alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__') @@ -629,10 +633,12 @@ STAGE PLANS: TableScan alias: alterdynamic_part_table filterExpr: (struct(partcol1,partcol2)) IN (const struct(2,'1'), const struct(1,'__HIVE_DEFAULT_PARTITION__')) (type: boolean) + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: intcol (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select intcol from pt.alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__') diff --git a/ql/src/test/results/clientpositive/llap/ambiguitycheck.q.out b/ql/src/test/results/clientpositive/ambiguitycheck.q.out similarity index 98% rename from ql/src/test/results/clientpositive/llap/ambiguitycheck.q.out rename to ql/src/test/results/clientpositive/ambiguitycheck.q.out index 5f15e50cf6..3de3d68118 100644 --- a/ql/src/test/results/clientpositive/llap/ambiguitycheck.q.out +++ b/ql/src/test/results/clientpositive/ambiguitycheck.q.out @@ -63,6 +63,7 @@ GROUPING SETS ((tab1.key, tab1.value)) POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +0 val_0 3 10 val_10 1 100 val_100 2 103 val_103 2 @@ -70,34 +71,58 @@ POSTHOOK: Input: default@src 105 val_105 1 11 val_11 1 111 val_111 1 +113 val_113 2 114 val_114 1 116 val_116 1 118 val_118 2 +119 val_119 3 +12 val_12 2 +120 val_120 2 125 val_125 2 126 val_126 1 +128 val_128 3 +129 val_129 2 131 val_131 1 133 val_133 1 134 val_134 2 +136 val_136 1 +137 val_137 2 +138 val_138 4 143 val_143 1 +145 val_145 1 +146 val_146 2 +149 val_149 2 15 val_15 2 150 val_150 1 152 val_152 2 +153 val_153 1 155 val_155 1 +156 val_156 1 157 val_157 1 +158 val_158 1 +160 val_160 1 +162 val_162 1 163 val_163 1 164 val_164 2 +165 val_165 2 +166 val_166 1 167 val_167 3 168 val_168 1 169 val_169 4 17 val_17 1 170 val_170 1 +172 val_172 2 174 val_174 2 175 val_175 2 176 val_176 2 177 val_177 1 +178 val_178 1 +179 val_179 2 18 val_18 2 180 val_180 1 +181 val_181 1 183 val_183 1 +186 val_186 1 187 val_187 3 189 val_189 1 19 val_19 1 @@ -105,7 +130,9 @@ POSTHOOK: Input: default@src 191 val_191 2 192 val_192 1 193 val_193 3 +194 val_194 1 195 val_195 2 +196 val_196 1 197 val_197 2 199 val_199 3 2 val_2 1 @@ -117,250 +144,221 @@ POSTHOOK: Input: default@src 205 val_205 2 207 val_207 2 208 val_208 3 +209 val_209 2 213 val_213 2 214 val_214 1 216 val_216 2 217 val_217 2 +218 val_218 1 +219 val_219 2 221 val_221 2 +222 val_222 1 +223 val_223 2 +224 val_224 2 +226 val_226 1 +228 val_228 1 229 val_229 2 230 val_230 5 +233 val_233 2 +235 val_235 1 237 val_237 2 238 val_238 2 239 val_239 2 24 val_24 2 241 val_241 1 +242 val_242 2 244 val_244 1 +247 val_247 1 248 val_248 1 +249 val_249 1 252 val_252 1 +255 val_255 2 +256 val_256 2 +257 val_257 1 258 val_258 1 26 val_26 2 +260 val_260 1 +262 val_262 1 263 val_263 1 +265 val_265 2 +266 val_266 1 27 val_27 1 272 val_272 2 273 val_273 3 274 val_274 1 +275 val_275 1 +277 val_277 4 278 val_278 2 +28 val_28 1 +280 val_280 2 281 val_281 2 282 val_282 2 283 val_283 1 +284 val_284 1 285 val_285 1 286 val_286 1 287 val_287 1 288 val_288 2 +289 val_289 1 291 val_291 1 +292 val_292 1 +296 val_296 1 298 val_298 3 30 val_30 1 302 val_302 1 305 val_305 1 306 val_306 1 307 val_307 2 +308 val_308 1 309 val_309 2 +310 val_310 1 +311 val_311 3 315 val_315 1 316 val_316 3 +317 val_317 2 +318 val_318 3 321 val_321 2 322 val_322 2 323 val_323 1 325 val_325 2 +327 val_327 3 33 val_33 1 +331 val_331 2 332 val_332 1 333 val_333 2 335 val_335 1 336 val_336 1 338 val_338 1 +339 val_339 1 34 val_34 1 +341 val_341 1 +342 val_342 2 344 val_344 2 +345 val_345 1 348 val_348 5 35 val_35 3 +351 val_351 1 353 val_353 2 +356 val_356 1 360 val_360 1 362 val_362 1 +364 val_364 1 +365 val_365 1 366 val_366 1 367 val_367 2 +368 val_368 1 +369 val_369 3 +37 val_37 2 373 val_373 1 +374 val_374 1 +375 val_375 1 +377 val_377 1 +378 val_378 1 379 val_379 1 +382 val_382 2 +384 val_384 3 386 val_386 1 +389 val_389 1 +392 val_392 1 +393 val_393 1 394 val_394 1 +395 val_395 2 +396 val_396 3 +397 val_397 2 399 val_399 2 4 val_4 1 +400 val_400 1 401 val_401 5 402 val_402 1 +403 val_403 3 404 val_404 2 406 val_406 4 +407 val_407 1 409 val_409 3 41 val_41 1 411 val_411 1 413 val_413 2 +414 val_414 2 +417 val_417 3 418 val_418 1 419 val_419 1 +42 val_42 2 421 val_421 1 424 val_424 2 427 val_427 1 429 val_429 2 +43 val_43 1 +430 val_430 3 431 val_431 3 432 val_432 1 435 val_435 1 436 val_436 1 437 val_437 1 438 val_438 3 -444 val_444 1 -452 val_452 1 -453 val_453 1 -455 val_455 1 -457 val_457 1 -459 val_459 2 -463 val_463 2 -466 val_466 3 -47 val_47 1 -472 val_472 1 -475 val_475 1 -478 val_478 2 -479 val_479 1 -482 val_482 1 -483 val_483 1 -484 val_484 1 -492 val_492 2 -494 val_494 1 -498 val_498 3 -5 val_5 3 -54 val_54 1 -57 val_57 1 -65 val_65 1 -69 val_69 1 -72 val_72 2 -76 val_76 2 -78 val_78 1 -8 val_8 1 -80 val_80 1 -90 val_90 3 -98 val_98 2 -0 val_0 3 -113 val_113 2 -119 val_119 3 -12 val_12 2 -120 val_120 2 -128 val_128 3 -129 val_129 2 -136 val_136 1 -137 val_137 2 -138 val_138 4 -145 val_145 1 -146 val_146 2 -149 val_149 2 -153 val_153 1 -156 val_156 1 -158 val_158 1 -160 val_160 1 -162 val_162 1 -165 val_165 2 -166 val_166 1 -172 val_172 2 -178 val_178 1 -179 val_179 2 -181 val_181 1 -186 val_186 1 -194 val_194 1 -196 val_196 1 -209 val_209 2 -218 val_218 1 -219 val_219 2 -222 val_222 1 -223 val_223 2 -224 val_224 2 -226 val_226 1 -228 val_228 1 -233 val_233 2 -235 val_235 1 -242 val_242 2 -247 val_247 1 -249 val_249 1 -255 val_255 2 -256 val_256 2 -257 val_257 1 -260 val_260 1 -262 val_262 1 -265 val_265 2 -266 val_266 1 -275 val_275 1 -277 val_277 4 -28 val_28 1 -280 val_280 2 -284 val_284 1 -289 val_289 1 -292 val_292 1 -296 val_296 1 -308 val_308 1 -310 val_310 1 -311 val_311 3 -317 val_317 2 -318 val_318 3 -327 val_327 3 -331 val_331 2 -339 val_339 1 -341 val_341 1 -342 val_342 2 -345 val_345 1 -351 val_351 1 -356 val_356 1 -364 val_364 1 -365 val_365 1 -368 val_368 1 -369 val_369 3 -37 val_37 2 -374 val_374 1 -375 val_375 1 -377 val_377 1 -378 val_378 1 -382 val_382 2 -384 val_384 3 -389 val_389 1 -392 val_392 1 -393 val_393 1 -395 val_395 2 -396 val_396 3 -397 val_397 2 -400 val_400 1 -403 val_403 3 -407 val_407 1 -414 val_414 2 -417 val_417 3 -42 val_42 2 -43 val_43 1 -430 val_430 3 439 val_439 2 44 val_44 1 443 val_443 1 +444 val_444 1 446 val_446 1 448 val_448 1 449 val_449 1 +452 val_452 1 +453 val_453 1 454 val_454 3 +455 val_455 1 +457 val_457 1 458 val_458 2 +459 val_459 2 460 val_460 1 462 val_462 2 +463 val_463 2 +466 val_466 3 467 val_467 1 468 val_468 4 469 val_469 5 +47 val_47 1 470 val_470 1 +472 val_472 1 +475 val_475 1 477 val_477 1 +478 val_478 2 +479 val_479 1 480 val_480 3 481 val_481 1 +482 val_482 1 +483 val_483 1 +484 val_484 1 485 val_485 1 487 val_487 1 489 val_489 4 490 val_490 1 491 val_491 1 +492 val_492 2 493 val_493 1 +494 val_494 1 495 val_495 1 496 val_496 1 497 val_497 1 +498 val_498 3 +5 val_5 3 51 val_51 2 53 val_53 1 +54 val_54 1 +57 val_57 1 58 val_58 2 64 val_64 1 +65 val_65 1 66 val_66 1 67 val_67 2 +69 val_69 1 70 val_70 3 +72 val_72 2 74 val_74 1 +76 val_76 2 77 val_77 1 +78 val_78 1 +8 val_8 1 +80 val_80 1 82 val_82 1 83 val_83 2 84 val_84 2 @@ -368,10 +366,12 @@ POSTHOOK: Input: default@src 86 val_86 1 87 val_87 1 9 val_9 1 +90 val_90 3 92 val_92 1 95 val_95 2 96 val_96 1 97 val_97 2 +98 val_98 2 PREHOOK: query: SELECT key, src.value, SUM(1) @@ -392,6 +392,7 @@ GROUPING SETS ((key, src.value)) POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +0 val_0 3 10 val_10 1 100 val_100 2 103 val_103 2 @@ -399,34 +400,58 @@ POSTHOOK: Input: default@src 105 val_105 1 11 val_11 1 111 val_111 1 +113 val_113 2 114 val_114 1 116 val_116 1 118 val_118 2 +119 val_119 3 +12 val_12 2 +120 val_120 2 125 val_125 2 126 val_126 1 +128 val_128 3 +129 val_129 2 131 val_131 1 133 val_133 1 134 val_134 2 +136 val_136 1 +137 val_137 2 +138 val_138 4 143 val_143 1 +145 val_145 1 +146 val_146 2 +149 val_149 2 15 val_15 2 150 val_150 1 152 val_152 2 +153 val_153 1 155 val_155 1 +156 val_156 1 157 val_157 1 +158 val_158 1 +160 val_160 1 +162 val_162 1 163 val_163 1 164 val_164 2 +165 val_165 2 +166 val_166 1 167 val_167 3 168 val_168 1 169 val_169 4 17 val_17 1 170 val_170 1 +172 val_172 2 174 val_174 2 175 val_175 2 176 val_176 2 177 val_177 1 +178 val_178 1 +179 val_179 2 18 val_18 2 180 val_180 1 +181 val_181 1 183 val_183 1 +186 val_186 1 187 val_187 3 189 val_189 1 19 val_19 1 @@ -434,7 +459,9 @@ POSTHOOK: Input: default@src 191 val_191 2 192 val_192 1 193 val_193 3 +194 val_194 1 195 val_195 2 +196 val_196 1 197 val_197 2 199 val_199 3 2 val_2 1 @@ -446,250 +473,221 @@ POSTHOOK: Input: default@src 205 val_205 2 207 val_207 2 208 val_208 3 +209 val_209 2 213 val_213 2 214 val_214 1 216 val_216 2 217 val_217 2 +218 val_218 1 +219 val_219 2 221 val_221 2 +222 val_222 1 +223 val_223 2 +224 val_224 2 +226 val_226 1 +228 val_228 1 229 val_229 2 230 val_230 5 +233 val_233 2 +235 val_235 1 237 val_237 2 238 val_238 2 239 val_239 2 24 val_24 2 241 val_241 1 +242 val_242 2 244 val_244 1 +247 val_247 1 248 val_248 1 +249 val_249 1 252 val_252 1 +255 val_255 2 +256 val_256 2 +257 val_257 1 258 val_258 1 26 val_26 2 +260 val_260 1 +262 val_262 1 263 val_263 1 +265 val_265 2 +266 val_266 1 27 val_27 1 272 val_272 2 273 val_273 3 274 val_274 1 +275 val_275 1 +277 val_277 4 278 val_278 2 +28 val_28 1 +280 val_280 2 281 val_281 2 282 val_282 2 283 val_283 1 +284 val_284 1 285 val_285 1 286 val_286 1 287 val_287 1 288 val_288 2 +289 val_289 1 291 val_291 1 +292 val_292 1 +296 val_296 1 298 val_298 3 30 val_30 1 302 val_302 1 305 val_305 1 306 val_306 1 307 val_307 2 +308 val_308 1 309 val_309 2 +310 val_310 1 +311 val_311 3 315 val_315 1 316 val_316 3 +317 val_317 2 +318 val_318 3 321 val_321 2 322 val_322 2 323 val_323 1 325 val_325 2 +327 val_327 3 33 val_33 1 +331 val_331 2 332 val_332 1 333 val_333 2 335 val_335 1 336 val_336 1 338 val_338 1 +339 val_339 1 34 val_34 1 +341 val_341 1 +342 val_342 2 344 val_344 2 +345 val_345 1 348 val_348 5 35 val_35 3 +351 val_351 1 353 val_353 2 +356 val_356 1 360 val_360 1 362 val_362 1 +364 val_364 1 +365 val_365 1 366 val_366 1 367 val_367 2 +368 val_368 1 +369 val_369 3 +37 val_37 2 373 val_373 1 +374 val_374 1 +375 val_375 1 +377 val_377 1 +378 val_378 1 379 val_379 1 +382 val_382 2 +384 val_384 3 386 val_386 1 +389 val_389 1 +392 val_392 1 +393 val_393 1 394 val_394 1 +395 val_395 2 +396 val_396 3 +397 val_397 2 399 val_399 2 4 val_4 1 +400 val_400 1 401 val_401 5 402 val_402 1 +403 val_403 3 404 val_404 2 406 val_406 4 +407 val_407 1 409 val_409 3 41 val_41 1 411 val_411 1 413 val_413 2 +414 val_414 2 +417 val_417 3 418 val_418 1 419 val_419 1 +42 val_42 2 421 val_421 1 424 val_424 2 427 val_427 1 429 val_429 2 +43 val_43 1 +430 val_430 3 431 val_431 3 432 val_432 1 435 val_435 1 436 val_436 1 437 val_437 1 438 val_438 3 -444 val_444 1 -452 val_452 1 -453 val_453 1 -455 val_455 1 -457 val_457 1 -459 val_459 2 -463 val_463 2 -466 val_466 3 -47 val_47 1 -472 val_472 1 -475 val_475 1 -478 val_478 2 -479 val_479 1 -482 val_482 1 -483 val_483 1 -484 val_484 1 -492 val_492 2 -494 val_494 1 -498 val_498 3 -5 val_5 3 -54 val_54 1 -57 val_57 1 -65 val_65 1 -69 val_69 1 -72 val_72 2 -76 val_76 2 -78 val_78 1 -8 val_8 1 -80 val_80 1 -90 val_90 3 -98 val_98 2 -0 val_0 3 -113 val_113 2 -119 val_119 3 -12 val_12 2 -120 val_120 2 -128 val_128 3 -129 val_129 2 -136 val_136 1 -137 val_137 2 -138 val_138 4 -145 val_145 1 -146 val_146 2 -149 val_149 2 -153 val_153 1 -156 val_156 1 -158 val_158 1 -160 val_160 1 -162 val_162 1 -165 val_165 2 -166 val_166 1 -172 val_172 2 -178 val_178 1 -179 val_179 2 -181 val_181 1 -186 val_186 1 -194 val_194 1 -196 val_196 1 -209 val_209 2 -218 val_218 1 -219 val_219 2 -222 val_222 1 -223 val_223 2 -224 val_224 2 -226 val_226 1 -228 val_228 1 -233 val_233 2 -235 val_235 1 -242 val_242 2 -247 val_247 1 -249 val_249 1 -255 val_255 2 -256 val_256 2 -257 val_257 1 -260 val_260 1 -262 val_262 1 -265 val_265 2 -266 val_266 1 -275 val_275 1 -277 val_277 4 -28 val_28 1 -280 val_280 2 -284 val_284 1 -289 val_289 1 -292 val_292 1 -296 val_296 1 -308 val_308 1 -310 val_310 1 -311 val_311 3 -317 val_317 2 -318 val_318 3 -327 val_327 3 -331 val_331 2 -339 val_339 1 -341 val_341 1 -342 val_342 2 -345 val_345 1 -351 val_351 1 -356 val_356 1 -364 val_364 1 -365 val_365 1 -368 val_368 1 -369 val_369 3 -37 val_37 2 -374 val_374 1 -375 val_375 1 -377 val_377 1 -378 val_378 1 -382 val_382 2 -384 val_384 3 -389 val_389 1 -392 val_392 1 -393 val_393 1 -395 val_395 2 -396 val_396 3 -397 val_397 2 -400 val_400 1 -403 val_403 3 -407 val_407 1 -414 val_414 2 -417 val_417 3 -42 val_42 2 -43 val_43 1 -430 val_430 3 439 val_439 2 44 val_44 1 443 val_443 1 +444 val_444 1 446 val_446 1 448 val_448 1 449 val_449 1 +452 val_452 1 +453 val_453 1 454 val_454 3 +455 val_455 1 +457 val_457 1 458 val_458 2 +459 val_459 2 460 val_460 1 462 val_462 2 +463 val_463 2 +466 val_466 3 467 val_467 1 468 val_468 4 469 val_469 5 +47 val_47 1 470 val_470 1 +472 val_472 1 +475 val_475 1 477 val_477 1 +478 val_478 2 +479 val_479 1 480 val_480 3 481 val_481 1 +482 val_482 1 +483 val_483 1 +484 val_484 1 485 val_485 1 487 val_487 1 489 val_489 4 490 val_490 1 491 val_491 1 +492 val_492 2 493 val_493 1 +494 val_494 1 495 val_495 1 496 val_496 1 497 val_497 1 +498 val_498 3 +5 val_5 3 51 val_51 2 53 val_53 1 +54 val_54 1 +57 val_57 1 58 val_58 2 64 val_64 1 +65 val_65 1 66 val_66 1 67 val_67 2 +69 val_69 1 70 val_70 3 +72 val_72 2 74 val_74 1 +76 val_76 2 77 val_77 1 +78 val_78 1 +8 val_8 1 +80 val_80 1 82 val_82 1 83 val_83 2 84 val_84 2 @@ -697,10 +695,12 @@ POSTHOOK: Input: default@src 86 val_86 1 87 val_87 1 9 val_9 1 +90 val_90 3 92 val_92 1 95 val_95 2 96 val_96 1 97 val_97 2 +98 val_98 2 PREHOOK: query: explain extended select int(1.2) from src limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -722,12 +722,15 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: 1 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select int(1.2) from src limit 1 diff --git a/ql/src/test/results/clientpositive/ambiguous_col.q.out b/ql/src/test/results/clientpositive/ambiguous_col.q.out new file mode 100644 index 0000000000..bcc36a936b --- /dev/null +++ b/ql/src/test/results/clientpositive/ambiguous_col.q.out @@ -0,0 +1,406 @@ +PREHOOK: query: explain select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 39 Data size: 6942 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 39 Data size: 6942 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 39 Data size: 6942 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 128 val_128 +128 128 val_128 +128 128 val_128 +146 146 val_146 +146 146 val_146 +150 150 val_150 +213 213 val_213 +213 213 val_213 +224 224 val_224 +224 224 val_224 +238 238 val_238 +238 238 val_238 +255 255 val_255 +255 255 val_255 +273 273 val_273 +273 273 val_273 +273 273 val_273 +278 278 val_278 +278 278 val_278 +311 311 val_311 +311 311 val_311 +311 311 val_311 +369 369 val_369 +369 369 val_369 +369 369 val_369 +401 401 val_401 +401 401 val_401 +401 401 val_401 +401 401 val_401 +401 401 val_401 +406 406 val_406 +406 406 val_406 +406 406 val_406 +406 406 val_406 +66 66 val_66 +98 98 val_98 +98 98 val_98 +PREHOOK: query: explain select * from (select a.key, a.`[k].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from (select a.key, a.`[k].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from (select a.key, a.`[k].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key, a.`[k].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 128 +128 128 +128 128 +146 146 +146 146 +150 150 +213 213 +213 213 +224 224 +224 224 +238 238 +238 238 +255 255 +255 255 +273 273 +273 273 +273 273 +278 278 +278 278 +311 311 +311 311 +311 311 +369 369 +369 369 +369 369 +401 401 +401 401 +401 401 +401 401 +401 401 +406 406 +406 406 +406 406 +406 406 +66 66 +98 98 +98 98 +PREHOOK: query: explain select * from (select a.key, a.key from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from (select a.key, a.key from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 39 Data size: 6786 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 39 Data size: 6786 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from (select a.key, a.key from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key, a.key from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 128 +128 128 +128 128 +146 146 +146 146 +150 150 +213 213 +213 213 +224 224 +224 224 +238 238 +238 238 +255 255 +255 255 +273 273 +273 273 +273 273 +278 278 +278 278 +311 311 +311 311 +311 311 +369 369 +369 369 +369 369 +401 401 +401 401 +401 401 +401 401 +401 401 +406 406 +406 406 +406 406 +406 406 +66 66 +98 98 +98 98 +PREHOOK: query: explain select count(*) from (select key, key from src) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select count(*) from (select key, key from src) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from (select key, key from src) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from (select key, key from src) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +500 diff --git a/ql/src/test/results/clientpositive/llap/analyze_table_null_partition.q.out b/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out similarity index 98% rename from ql/src/test/results/clientpositive/llap/analyze_table_null_partition.q.out rename to ql/src/test/results/clientpositive/analyze_table_null_partition.q.out index 3206980bd1..ab05bd9f1a 100644 --- a/ql/src/test/results/clientpositive/llap/analyze_table_null_partition.q.out +++ b/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out @@ -296,10 +296,12 @@ STAGE PLANS: Processor Tree: TableScan alias: test2_n6 + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: name (type: string), age (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: DROP TABLE test1_n9 diff --git a/ql/src/test/results/clientpositive/llap/analyze_tbl_date.q.out b/ql/src/test/results/clientpositive/analyze_tbl_date.q.out similarity index 74% rename from ql/src/test/results/clientpositive/llap/analyze_tbl_date.q.out rename to ql/src/test/results/clientpositive/analyze_tbl_date.q.out index 5a527ca6f5..9ce5f589ce 100644 --- a/ql/src/test/results/clientpositive/llap/analyze_tbl_date.q.out +++ b/ql/src/test/results/clientpositive/analyze_tbl_date.q.out @@ -73,31 +73,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: test_table_n7 - filterExpr: d is not null (type: boolean) + Map Reduce + Map Operator Tree: + TableScan + alias: test_table_n7 + filterExpr: d is not null (type: boolean) + Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: d is not null (type: boolean) + Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: d is not null (type: boolean) - Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out new file mode 100644 index 0000000000..e721eacdcc --- /dev/null +++ b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out @@ -0,0 +1,262 @@ +PREHOOK: query: create table over1k_n4( +t tinyint, +si smallint, +i int, +b bigint, +f float, +d double, +bo boolean, +s string, +ts timestamp, +`dec` decimal(4,2), +bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_n4 +POSTHOOK: query: create table over1k_n4( +t tinyint, +si smallint, +i int, +b bigint, +f float, +d double, +bo boolean, +s string, +ts timestamp, +`dec` decimal(4,2), +bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_n4 +PREHOOK: query: load data local inpath '../../data/files/over1k' overwrite into table over1k_n4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k_n4 +POSTHOOK: query: load data local inpath '../../data/files/over1k' overwrite into table over1k_n4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k_n4 +PREHOOK: query: load data local inpath '../../data/files/over1k' into table over1k_n4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k_n4 +POSTHOOK: query: load data local inpath '../../data/files/over1k' into table over1k_n4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k_n4 +PREHOOK: query: analyze table over1k_n4 compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n4 +PREHOOK: Output: default@over1k_n4 +POSTHOOK: query: analyze table over1k_n4 compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n4 +POSTHOOK: Output: default@over1k_n4 +PREHOOK: query: analyze table over1k_n4 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@over1k_n4 +PREHOOK: Output: default@over1k_n4 +#### A masked pattern was here #### +POSTHOOK: query: analyze table over1k_n4 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@over1k_n4 +POSTHOOK: Output: default@over1k_n4 +#### A masked pattern was here #### +PREHOOK: query: explain select count(*) from over1k_n4 where ( +(t=1 and si=2) +or (t=2 and si=3) +or (t=3 and si=4) +or (t=4 and si=5) +or (t=5 and si=6) +or (t=6 and si=7) +or (t=7 and si=8) +or (t=9 and si=10) +or (t=10 and si=11) +or (t=11 and si=12) +or (t=12 and si=13) +or (t=13 and si=14) +or (t=14 and si=15) +or (t=15 and si=16) +or (t=16 and si=17) +or (t=17 and si=18) +or (t=27 and si=28) +or (t=37 and si=38) +or (t=47 and si=48) +or (t=52 and si=53)) +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n4 +#### A masked pattern was here #### +POSTHOOK: query: explain select count(*) from over1k_n4 where ( +(t=1 and si=2) +or (t=2 and si=3) +or (t=3 and si=4) +or (t=4 and si=5) +or (t=5 and si=6) +or (t=6 and si=7) +or (t=7 and si=8) +or (t=9 and si=10) +or (t=10 and si=11) +or (t=11 and si=12) +or (t=12 and si=13) +or (t=13 and si=14) +or (t=14 and si=15) +or (t=15 and si=16) +or (t=16 and si=17) +or (t=17 and si=18) +or (t=27 and si=28) +or (t=37 and si=38) +or (t=47 and si=48) +or (t=52 and si=53)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n4 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1k_n4 + filterExpr: (((t = 1Y) and (si = 2S)) or ((t = 2Y) and (si = 3S)) or ((t = 3Y) and (si = 4S)) or ((t = 4Y) and (si = 5S)) or ((t = 5Y) and (si = 6S)) or ((t = 6Y) and (si = 7S)) or ((t = 7Y) and (si = 8S)) or ((t = 9Y) and (si = 10S)) or ((t = 10Y) and (si = 11S)) or ((t = 11Y) and (si = 12S)) or ((t = 12Y) and (si = 13S)) or ((t = 13Y) and (si = 14S)) or ((t = 14Y) and (si = 15S)) or ((t = 15Y) and (si = 16S)) or ((t = 16Y) and (si = 17S)) or ((t = 17Y) and (si = 18S)) or ((t = 27Y) and (si = 28S)) or ((t = 37Y) and (si = 38S)) or ((t = 47Y) and (si = 48S)) or ((t = 52Y) and (si = 53S))) (type: boolean) + Statistics: Num rows: 2098 Data size: 16744 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((t = 1Y) and (si = 2S)) or ((t = 2Y) and (si = 3S)) or ((t = 3Y) and (si = 4S)) or ((t = 4Y) and (si = 5S)) or ((t = 5Y) and (si = 6S)) or ((t = 6Y) and (si = 7S)) or ((t = 7Y) and (si = 8S)) or ((t = 9Y) and (si = 10S)) or ((t = 10Y) and (si = 11S)) or ((t = 11Y) and (si = 12S)) or ((t = 12Y) and (si = 13S)) or ((t = 13Y) and (si = 14S)) or ((t = 14Y) and (si = 15S)) or ((t = 15Y) and (si = 16S)) or ((t = 16Y) and (si = 17S)) or ((t = 17Y) and (si = 18S)) or ((t = 27Y) and (si = 28S)) or ((t = 37Y) and (si = 38S)) or ((t = 47Y) and (si = 48S)) or ((t = 52Y) and (si = 53S))) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(*) from over1k_n4 where ( +(t=1 and si=2) +or (t=2 and si=3) +or (t=3 and si=4) +or (t=4 and si=5) +or (t=5 and si=6) +or (t=6 and si=7) +or (t=7 and si=8) +or (t=9 and si=10) +or (t=10 and si=11) +or (t=11 and si=12) +or (t=12 and si=13) +or (t=13 and si=14) +or (t=14 and si=15) +or (t=15 and si=16) +or (t=16 and si=17) +or (t=17 and si=18) +or (t=27 and si=28) +or (t=37 and si=38) +or (t=47 and si=48) +or (t=52 and si=53)) +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n4 +#### A masked pattern was here #### +POSTHOOK: query: explain select count(*) from over1k_n4 where ( +(t=1 and si=2) +or (t=2 and si=3) +or (t=3 and si=4) +or (t=4 and si=5) +or (t=5 and si=6) +or (t=6 and si=7) +or (t=7 and si=8) +or (t=9 and si=10) +or (t=10 and si=11) +or (t=11 and si=12) +or (t=12 and si=13) +or (t=13 and si=14) +or (t=14 and si=15) +or (t=15 and si=16) +or (t=16 and si=17) +or (t=17 and si=18) +or (t=27 and si=28) +or (t=37 and si=38) +or (t=47 and si=48) +or (t=52 and si=53)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n4 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1k_n4 + filterExpr: (((t = 1Y) and (si = 2S)) or ((t = 2Y) and (si = 3S)) or ((t = 3Y) and (si = 4S)) or ((t = 4Y) and (si = 5S)) or ((t = 5Y) and (si = 6S)) or ((t = 6Y) and (si = 7S)) or ((t = 7Y) and (si = 8S)) or ((t = 9Y) and (si = 10S)) or ((t = 10Y) and (si = 11S)) or ((t = 11Y) and (si = 12S)) or ((t = 12Y) and (si = 13S)) or ((t = 13Y) and (si = 14S)) or ((t = 14Y) and (si = 15S)) or ((t = 15Y) and (si = 16S)) or ((t = 16Y) and (si = 17S)) or ((t = 17Y) and (si = 18S)) or ((t = 27Y) and (si = 28S)) or ((t = 37Y) and (si = 38S)) or ((t = 47Y) and (si = 48S)) or ((t = 52Y) and (si = 53S))) (type: boolean) + Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((t = 1Y) and (si = 2S)) or ((t = 2Y) and (si = 3S)) or ((t = 3Y) and (si = 4S)) or ((t = 4Y) and (si = 5S)) or ((t = 5Y) and (si = 6S)) or ((t = 6Y) and (si = 7S)) or ((t = 7Y) and (si = 8S)) or ((t = 9Y) and (si = 10S)) or ((t = 10Y) and (si = 11S)) or ((t = 11Y) and (si = 12S)) or ((t = 12Y) and (si = 13S)) or ((t = 13Y) and (si = 14S)) or ((t = 14Y) and (si = 15S)) or ((t = 15Y) and (si = 16S)) or ((t = 16Y) and (si = 17S)) or ((t = 17Y) and (si = 18S)) or ((t = 27Y) and (si = 28S)) or ((t = 37Y) and (si = 38S)) or ((t = 47Y) and (si = 48S)) or ((t = 52Y) and (si = 53S))) (type: boolean) + Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out new file mode 100644 index 0000000000..6f8fad3971 --- /dev/null +++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -0,0 +1,1111 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_staging +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_orc +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: explain select * from loc_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select * from loc_orc where state='OH' +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where state='OH' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (state = 'OH') (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (state = 'OH') (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'OH' (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +#### A masked pattern was here #### +PREHOOK: query: explain select * from loc_orc where state='OH' +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where state='OH' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (state = 'OH') (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (state = 'OH') (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'OH' (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where state!='OH' +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where state!='OH' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (state <> 'OH') (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (state <> 'OH') (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where state<>'OH' +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where state<>'OH' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (state <> 'OH') (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (state <> 'OH') (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where zip is null +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where zip is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: zip is null (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: zip is null (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), null (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where !(zip is not null) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where !(zip is not null) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: zip is null (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: zip is null (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), null (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where zip is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where zip is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: zip is not null (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: zip is not null (type: boolean) + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where !(zip is null) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where !(zip is null) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: zip is not null (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: zip is not null (type: boolean) + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where !false +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where !false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select * from loc_orc where !true +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where !true +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where true +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where true +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select * from loc_orc where 'foo' +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where 'foo' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: 'foo' (type: string) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where true = true +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where true = true +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select * from loc_orc where false = true +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where false = true +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where 'foo' = 'bar' +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where 'foo' = 'bar' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where false +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where state='OH' or state='CA' +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where state='OH' or state='CA' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (state) IN ('OH', 'CA') (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (state) IN ('OH', 'CA') (type: boolean) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where year=2001 and year is null +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where year=2001 and year is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where year=2001 and state='OH' and state='FL' +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where year=2001 and state='OH' and state='FL' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where (year=2001 and year is null) or (state='CA') +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where (year=2001 and year is null) or (state='CA') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (state = 'CA') (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (state = 'CA') (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'CA' (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where (year=2001 or year is null) and (state='CA') +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where (year=2001 or year is null) and (state='CA') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (((year = 2001) or year is null) and (state = 'CA')) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((year = 2001) or year is null) and (state = 'CA')) (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'CA' (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where locid < 30 +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where locid < 30 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (locid < 30) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid < 30) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where locid > 30 +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where locid > 30 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (locid > 30) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 30) (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where locid <= 30 +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where locid <= 30 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (locid <= 30) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid <= 30) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where locid >= 30 +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where locid >= 30 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (locid >= 30) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid >= 30) (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where locid < 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where locid < 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (locid < 3) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid < 3) (type: boolean) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where locid > 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where locid > 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (locid > 3) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 3) (type: boolean) + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where locid <= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where locid <= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (locid <= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid <= 3) (type: boolean) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where locid >= 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc where locid >= 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + filterExpr: (locid >= 3) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid >= 3) (type: boolean) + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out new file mode 100644 index 0000000000..4443a0e079 --- /dev/null +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -0,0 +1,1588 @@ +PREHOOK: query: create table if not exists loc_staging_n2 ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_staging_n2 +POSTHOOK: query: create table if not exists loc_staging_n2 ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_staging_n2 +PREHOOK: query: create table loc_orc_n2 like loc_staging_n2 +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_orc_n2 +POSTHOOK: query: create table loc_orc_n2 like loc_staging_n2 +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_orc_n2 +PREHOOK: query: alter table loc_orc_n2 set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc_n2 +PREHOOK: Output: default@loc_orc_n2 +POSTHOOK: query: alter table loc_orc_n2 set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc_n2 +POSTHOOK: Output: default@loc_orc_n2 +PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging_n2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@loc_staging_n2 +POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging_n2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@loc_staging_n2 +PREHOOK: query: insert overwrite table loc_orc_n2 select * from loc_staging_n2 +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging_n2 +PREHOOK: Output: default@loc_orc_n2 +POSTHOOK: query: insert overwrite table loc_orc_n2 select * from loc_staging_n2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging_n2 +POSTHOOK: Output: default@loc_orc_n2 +POSTHOOK: Lineage: loc_orc_n2.locid SIMPLE [(loc_staging_n2)loc_staging_n2.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_n2.state SIMPLE [(loc_staging_n2)loc_staging_n2.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_n2.year SIMPLE [(loc_staging_n2)loc_staging_n2.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_n2.zip SIMPLE [(loc_staging_n2)loc_staging_n2.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: explain select * from loc_orc_n2 +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc_n2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: analyze table loc_orc_n2 compute statistics for columns state +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@loc_orc_n2 +PREHOOK: Output: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_n2 compute statistics for columns state +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@loc_orc_n2 +POSTHOOK: Output: default@loc_orc_n2 +#### A masked pattern was here #### +PREHOOK: query: explain select a, c, min(b) +from ( select state as a, locid as b, count(*) as c + from loc_orc_n2 + group by state,locid + ) sq1 +group by a,c +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select a, c, min(b) +from ( select state as a, locid as b, count(*) as c + from loc_orc_n2 + group by state,locid + ) sq1 +group by a,c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: state (type: string), locid (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col1) + keys: _col0 (type: string), _col2 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: analyze table loc_orc_n2 compute statistics for columns state,locid,year +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@loc_orc_n2 +PREHOOK: Output: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_n2 compute statistics for columns state,locid,year +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@loc_orc_n2 +POSTHOOK: Output: default@loc_orc_n2 +#### A masked pattern was here #### +PREHOOK: query: explain select year from loc_orc_n2 group by year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select year from loc_orc_n2 group by year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: year (type: int) + outputColumnNames: year + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: year (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by rollup( state,locid ) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by rollup( state,locid ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state)) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid)) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid),()) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state,locid),(state),(locid),()) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state,locid),(state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select year from loc_orc_n2 group by year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select year from loc_orc_n2 group by year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: year (type: int) + outputColumnNames: year + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: year (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,zip from loc_orc_n2 group by state,zip +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,zip from loc_orc_n2 group by state,zip +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), zip (type: bigint) + outputColumnNames: state, zip + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), zip (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by rollup (state,locid) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by rollup (state,locid) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state)) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid)) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid),()) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state,locid),(state),(locid),()) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state,locid),(state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select year from loc_orc_n2 group by year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select year from loc_orc_n2 group by year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year (type: int) + outputColumnNames: year + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: year (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n2 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table t1_uq12(i int, j int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1_uq12 +POSTHOOK: query: create table t1_uq12(i int, j int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1_uq12 +PREHOOK: query: alter table t1_uq12 update statistics set('numRows'='10000', 'rawDataSize'='18000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t1_uq12 +PREHOOK: Output: default@t1_uq12 +POSTHOOK: query: alter table t1_uq12 update statistics set('numRows'='10000', 'rawDataSize'='18000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t1_uq12 +POSTHOOK: Output: default@t1_uq12 +PREHOOK: query: alter table t1_uq12 update statistics for column i set('numDVs'='2500','numNulls'='50','highValue'='1000','lowValue'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t1_uq12 +PREHOOK: Output: default@t1_uq12 +POSTHOOK: query: alter table t1_uq12 update statistics for column i set('numDVs'='2500','numNulls'='50','highValue'='1000','lowValue'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t1_uq12 +POSTHOOK: Output: default@t1_uq12 +PREHOOK: query: alter table t1_uq12 update statistics for column j set('numDVs'='500','numNulls'='30','highValue'='100','lowValue'='50') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t1_uq12 +PREHOOK: Output: default@t1_uq12 +POSTHOOK: query: alter table t1_uq12 update statistics for column j set('numDVs'='500','numNulls'='30','highValue'='100','lowValue'='50') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t1_uq12 +POSTHOOK: Output: default@t1_uq12 +PREHOOK: query: create table t2_uq12(i2 int, j2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2_uq12 +POSTHOOK: query: create table t2_uq12(i2 int, j2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2_uq12 +PREHOOK: query: alter table t2_uq12 update statistics set('numRows'='100000000', 'rawDataSize'='10000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t2_uq12 +PREHOOK: Output: default@t2_uq12 +POSTHOOK: query: alter table t2_uq12 update statistics set('numRows'='100000000', 'rawDataSize'='10000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t2_uq12 +POSTHOOK: Output: default@t2_uq12 +PREHOOK: query: alter table t2_uq12 update statistics for column i2 set('numDVs'='10000000','numNulls'='0','highValue'='8000','lowValue'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t2_uq12 +PREHOOK: Output: default@t2_uq12 +POSTHOOK: query: alter table t2_uq12 update statistics for column i2 set('numDVs'='10000000','numNulls'='0','highValue'='8000','lowValue'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t2_uq12 +POSTHOOK: Output: default@t2_uq12 +PREHOOK: query: alter table t2_uq12 update statistics for column j2 set('numDVs'='10','numNulls'='0','highValue'='800','lowValue'='-1') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t2_uq12 +PREHOOK: Output: default@t2_uq12 +POSTHOOK: query: alter table t2_uq12 update statistics for column j2 set('numDVs'='10','numNulls'='0','highValue'='800','lowValue'='-1') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t2_uq12 +POSTHOOK: Output: default@t2_uq12 +PREHOOK: query: explain select count (1) from t1_uq12,t2_uq12 where t1_uq12.j=t2_uq12.i2 group by t1_uq12.i, t1_uq12.j +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_uq12 +PREHOOK: Input: default@t2_uq12 +#### A masked pattern was here #### +POSTHOOK: query: explain select count (1) from t1_uq12,t2_uq12 where t1_uq12.j=t2_uq12.i2 group by t1_uq12.i, t1_uq12.j +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_uq12 +POSTHOOK: Input: default@t2_uq12 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_uq12 + filterExpr: j is not null (type: boolean) + Statistics: Num rows: 10000 Data size: 79688 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: j is not null (type: boolean) + Statistics: Num rows: 9970 Data size: 79448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9970 Data size: 79448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 9970 Data size: 79448 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + TableScan + alias: t2_uq12 + filterExpr: i2 is not null (type: boolean) + Statistics: Num rows: 100000000 Data size: 400000000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: i2 is not null (type: boolean) + Statistics: Num rows: 100000000 Data size: 400000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 100000000 Data size: 400000000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 100000000 Data size: 400000000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 99700 Data size: 797288 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: int), _col1 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9970 Data size: 159496 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 9970 Data size: 159496 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9970 Data size: 159496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 9970 Data size: 79760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 9970 Data size: 79760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table t1_uq12 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1_uq12 +PREHOOK: Output: default@t1_uq12 +POSTHOOK: query: drop table t1_uq12 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1_uq12 +POSTHOOK: Output: default@t1_uq12 +PREHOOK: query: drop table t2_uq12 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t2_uq12 +PREHOOK: Output: default@t2_uq12 +POSTHOOK: query: drop table t2_uq12 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t2_uq12 +POSTHOOK: Output: default@t2_uq12 diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out new file mode 100644 index 0000000000..e604e13ee8 --- /dev/null +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out @@ -0,0 +1,441 @@ +PREHOOK: query: drop table location +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table location +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table location (state string, country string, votes bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@location +POSTHOOK: query: create table location (state string, country string, votes bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@location +PREHOOK: query: load data local inpath "../../data/files/location.txt" overwrite into table location +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@location +POSTHOOK: query: load data local inpath "../../data/files/location.txt" overwrite into table location +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@location +PREHOOK: query: analyze table location compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@location +PREHOOK: Output: default@location +POSTHOOK: query: analyze table location compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@location +POSTHOOK: Output: default@location +PREHOOK: query: analyze table location compute statistics for columns state, country +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@location +PREHOOK: Output: default@location +#### A masked pattern was here #### +POSTHOOK: query: analyze table location compute statistics for columns state, country +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@location +POSTHOOK: Output: default@location +#### A masked pattern was here #### +PREHOOK: query: explain select state, country from location group by state, country +PREHOOK: type: QUERY +PREHOOK: Input: default@location +#### A masked pattern was here #### +POSTHOOK: query: explain select state, country from location group by state, country +POSTHOOK: type: QUERY +POSTHOOK: Input: default@location +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: location + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), country (type: string) + outputColumnNames: state, country + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), country (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state, country from location group by state, country with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@location +#### A masked pattern was here #### +POSTHOOK: query: explain select state, country from location group by state, country with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@location +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: location + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), country (type: string) + outputColumnNames: state, country + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), country (type: string), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 80 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 80 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40 Data size: 400 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state, country from location group by state, country +PREHOOK: type: QUERY +PREHOOK: Input: default@location +#### A masked pattern was here #### +POSTHOOK: query: explain select state, country from location group by state, country +POSTHOOK: type: QUERY +POSTHOOK: Input: default@location +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: location + Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), country (type: string) + outputColumnNames: state, country + Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), country (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1730 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 10 Data size: 1730 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state, votes from location group by state, votes +PREHOOK: type: QUERY +PREHOOK: Input: default@location +#### A masked pattern was here #### +POSTHOOK: query: explain select state, votes from location group by state, votes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@location +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: location + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: state (type: string), votes (type: bigint) + outputColumnNames: state, votes + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + keys: state (type: string), votes (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state, country from location group by state, country with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@location +#### A masked pattern was here #### +POSTHOOK: query: explain select state, country from location group by state, country with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@location +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: location + Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), country (type: string) + outputColumnNames: state, country + Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), country (type: string), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1448 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 1448 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state, country from location group by state, country +PREHOOK: type: QUERY +PREHOOK: Input: default@location +#### A masked pattern was here #### +POSTHOOK: query: explain select state, country from location group by state, country +POSTHOOK: type: QUERY +POSTHOOK: Input: default@location +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: location + Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), country (type: string) + outputColumnNames: state, country + Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), country (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state, country from location group by state, country with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@location +#### A masked pattern was here #### +POSTHOOK: query: explain select state, country from location group by state, country with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@location +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: location + Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), country (type: string) + outputColumnNames: state, country + Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), country (type: string), 0L (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 80 Data size: 14480 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 80 Data size: 14480 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1448 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 1448 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table location +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@location +PREHOOK: Output: default@location +POSTHOOK: query: drop table location +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@location +POSTHOOK: Output: default@location diff --git a/ql/src/test/results/clientpositive/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/annotate_stats_join.q.out new file mode 100644 index 0000000000..f054af2a24 --- /dev/null +++ b/ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -0,0 +1,1097 @@ +PREHOOK: query: create table if not exists emp_n2 ( + lastname string, + deptid int, + locid int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emp_n2 +POSTHOOK: query: create table if not exists emp_n2 ( + lastname string, + deptid int, + locid int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emp_n2 +PREHOOK: query: create table if not exists dept_n1 ( + deptid int, + deptname string +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dept_n1 +POSTHOOK: query: create table if not exists dept_n1 ( + deptid int, + deptname string +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dept_n1 +PREHOOK: query: create table if not exists loc ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc +POSTHOOK: query: create table if not exists loc ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_n2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@emp_n2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_n2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@emp_n2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_n1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dept_n1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_n1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dept_n1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@loc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@loc +PREHOOK: query: analyze table emp_n2 compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_n2 +PREHOOK: Output: default@emp_n2 +POSTHOOK: query: analyze table emp_n2 compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_n2 +POSTHOOK: Output: default@emp_n2 +PREHOOK: query: analyze table dept_n1 compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_n1 +PREHOOK: Output: default@dept_n1 +POSTHOOK: query: analyze table dept_n1 compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_n1 +POSTHOOK: Output: default@dept_n1 +PREHOOK: query: analyze table loc compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@loc +PREHOOK: Output: default@loc +POSTHOOK: query: analyze table loc compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc +POSTHOOK: Output: default@loc +PREHOOK: query: analyze table emp_n2 compute statistics for columns lastname,deptid,locid +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@emp_n2 +PREHOOK: Output: default@emp_n2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table emp_n2 compute statistics for columns lastname,deptid,locid +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@emp_n2 +POSTHOOK: Output: default@emp_n2 +#### A masked pattern was here #### +PREHOOK: query: analyze table dept_n1 compute statistics for columns deptname,deptid +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@dept_n1 +PREHOOK: Output: default@dept_n1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table dept_n1 compute statistics for columns deptname,deptid +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@dept_n1 +POSTHOOK: Output: default@dept_n1 +#### A masked pattern was here #### +PREHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@loc +PREHOOK: Output: default@loc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@loc +POSTHOOK: Output: default@loc +#### A masked pattern was here #### +PREHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid) +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_n1 +PREHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_n1 +POSTHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: e + filterExpr: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int) + TableScan + alias: d + filterExpr: deptid is not null (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from emp_n2,dept_n1 where emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_n1 +PREHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from emp_n2,dept_n1 where emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_n1 +POSTHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emp_n2 + filterExpr: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + TableScan + alias: dept_n1 + filterExpr: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid and e.lastname = d.deptname) +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_n1 +PREHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid and e.lastname = d.deptname) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_n1 +POSTHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: e + filterExpr: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + TableScan + alias: d + filterExpr: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from emp_n2,dept_n1 where emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_n1 +PREHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from emp_n2,dept_n1 where emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_n1 +POSTHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emp_n2 + filterExpr: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + TableScan + alias: dept_n1 + filterExpr: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid) join emp_n2 e1 on (e.deptid = e1.deptid) +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_n1 +PREHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid) join emp_n2 e1 on (e.deptid = e1.deptid) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_n1 +POSTHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: e + filterExpr: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int) + TableScan + alias: d + filterExpr: deptid is not null (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + TableScan + alias: e1 + filterExpr: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 768 Data size: 225024 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 768 Data size: 225024 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid) +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_n1 +PREHOOK: Input: default@emp_n2 +PREHOOK: Input: default@loc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_n1 +POSTHOOK: Input: default@emp_n2 +POSTHOOK: Input: default@loc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: e + filterExpr: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int) + TableScan + alias: d + filterExpr: deptid is not null (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + TableScan + alias: l + filterExpr: locid is not null (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: locid is not null (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: bigint), _col3 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state) +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_n1 +PREHOOK: Input: default@emp_n2 +PREHOOK: Input: default@loc +#### A masked pattern was here #### +POSTHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_n1 +POSTHOOK: Input: default@emp_n2 +POSTHOOK: Input: default@loc +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: e + filterExpr: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + TableScan + alias: d + filterExpr: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: string) + TableScan + alias: l + filterExpr: (locid is not null and state is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid is not null and state is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from emp_n2 left outer join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_n1 +PREHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from emp_n2 left outer join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_n1 +POSTHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emp_n2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + TableScan + alias: dept_n1 + filterExpr: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from emp_n2 left semi join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_n1 +PREHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from emp_n2 left semi join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_n1 +POSTHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emp_n2 + filterExpr: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + TableScan + alias: dept_n1 + filterExpr: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptname (type: string), deptid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from emp_n2 right outer join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_n1 +PREHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from emp_n2 right outer join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_n1 +POSTHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emp_n2 + filterExpr: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + TableScan + alias: dept_n1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from emp_n2 full outer join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_n1 +PREHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from emp_n2 full outer join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_n1 +POSTHOOK: Input: default@emp_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emp_n2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + TableScan + alias: dept_n1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out new file mode 100644 index 0000000000..21a1b12feb --- /dev/null +++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -0,0 +1,1465 @@ +PREHOOK: query: drop table store_sales_n0 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table store_sales_n0 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table store_n0 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table store_n0 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table customer_address +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table customer_address +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table store_sales_n0 +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost float, + ss_list_price float, + ss_sales_price float, + ss_ext_discount_amt float, + ss_ext_sales_price float, + ss_ext_wholesale_cost float, + ss_ext_list_price float, + ss_ext_tax float, + ss_coupon_amt float, + ss_net_paid float, + ss_net_paid_inc_tax float, + ss_net_profit float +) +row format delimited fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales_n0 +POSTHOOK: query: create table store_sales_n0 +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost float, + ss_list_price float, + ss_sales_price float, + ss_ext_discount_amt float, + ss_ext_sales_price float, + ss_ext_wholesale_cost float, + ss_ext_list_price float, + ss_ext_tax float, + ss_coupon_amt float, + ss_net_paid float, + ss_net_paid_inc_tax float, + ss_net_profit float +) +row format delimited fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales_n0 +PREHOOK: query: create table store_n0 +( + s_store_sk int, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset float, + s_tax_precentage float +) +row format delimited fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_n0 +POSTHOOK: query: create table store_n0 +( + s_store_sk int, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset float, + s_tax_precentage float +) +row format delimited fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_n0 +PREHOOK: query: create table store_bigint +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset float, + s_tax_precentage float +) +row format delimited fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_bigint +POSTHOOK: query: create table store_bigint +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset float, + s_tax_precentage float +) +row format delimited fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_bigint +PREHOOK: query: create table customer_address +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset float, + ca_location_type string +) +row format delimited fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_address +POSTHOOK: query: create table customer_address +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset float, + ca_location_type string +) +row format delimited fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_address +PREHOOK: query: load data local inpath '../../data/files/store.txt' overwrite into table store_n0 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@store_n0 +POSTHOOK: query: load data local inpath '../../data/files/store.txt' overwrite into table store_n0 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@store_n0 +PREHOOK: query: load data local inpath '../../data/files/store.txt' overwrite into table store_bigint +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@store_bigint +POSTHOOK: query: load data local inpath '../../data/files/store.txt' overwrite into table store_bigint +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@store_bigint +PREHOOK: query: load data local inpath '../../data/files/store_sales.txt' overwrite into table store_sales_n0 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@store_sales_n0 +POSTHOOK: query: load data local inpath '../../data/files/store_sales.txt' overwrite into table store_sales_n0 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@store_sales_n0 +PREHOOK: query: load data local inpath '../../data/files/customer_address.txt' overwrite into table customer_address +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@customer_address +POSTHOOK: query: load data local inpath '../../data/files/customer_address.txt' overwrite into table customer_address +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@customer_address +PREHOOK: query: analyze table store_n0 compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@store_n0 +PREHOOK: Output: default@store_n0 +POSTHOOK: query: analyze table store_n0 compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_n0 +POSTHOOK: Output: default@store_n0 +PREHOOK: query: analyze table store_n0 compute statistics for columns s_store_sk, s_floor_space +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@store_n0 +PREHOOK: Output: default@store_n0 +#### A masked pattern was here #### +POSTHOOK: query: analyze table store_n0 compute statistics for columns s_store_sk, s_floor_space +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@store_n0 +POSTHOOK: Output: default@store_n0 +#### A masked pattern was here #### +PREHOOK: query: analyze table store_bigint compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@store_bigint +PREHOOK: Output: default@store_bigint +POSTHOOK: query: analyze table store_bigint compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_bigint +POSTHOOK: Output: default@store_bigint +PREHOOK: query: analyze table store_bigint compute statistics for columns s_store_sk, s_floor_space +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@store_bigint +PREHOOK: Output: default@store_bigint +#### A masked pattern was here #### +POSTHOOK: query: analyze table store_bigint compute statistics for columns s_store_sk, s_floor_space +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@store_bigint +POSTHOOK: Output: default@store_bigint +#### A masked pattern was here #### +PREHOOK: query: analyze table store_sales_n0 compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@store_sales_n0 +PREHOOK: Output: default@store_sales_n0 +POSTHOOK: query: analyze table store_sales_n0 compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_sales_n0 +POSTHOOK: Output: default@store_sales_n0 +PREHOOK: query: analyze table store_sales_n0 compute statistics for columns ss_store_sk, ss_addr_sk, ss_quantity +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@store_sales_n0 +PREHOOK: Output: default@store_sales_n0 +#### A masked pattern was here #### +POSTHOOK: query: analyze table store_sales_n0 compute statistics for columns ss_store_sk, ss_addr_sk, ss_quantity +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@store_sales_n0 +POSTHOOK: Output: default@store_sales_n0 +#### A masked pattern was here #### +PREHOOK: query: analyze table customer_address compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Output: default@customer_address +POSTHOOK: query: analyze table customer_address compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Output: default@customer_address +PREHOOK: query: analyze table customer_address compute statistics for columns ca_address_sk +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@customer_address +PREHOOK: Output: default@customer_address +#### A masked pattern was here #### +POSTHOOK: query: analyze table customer_address compute statistics for columns ca_address_sk +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@customer_address +POSTHOOK: Output: default@customer_address +#### A masked pattern was here #### +PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) +PREHOOK: type: QUERY +PREHOOK: Input: default@store_n0 +PREHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_n0 +POSTHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: s + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: ss + filterExpr: ss_store_sk is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ss_store_sk is not null (type: boolean) + Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store_bigint s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) +PREHOOK: type: QUERY +PREHOOK: Input: default@store_bigint +PREHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain select s.s_store_sk from store_bigint s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_bigint +POSTHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: s + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: ss + filterExpr: UDFToLong(ss_store_sk) is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: UDFToLong(ss_store_sk) is not null (type: boolean) + Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToLong(ss_store_sk) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 7440 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 964 Data size: 7440 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 7712 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 964 Data size: 7712 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@store_n0 +PREHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_n0 +POSTHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: s + filterExpr: (s_store_sk > 0) (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (s_store_sk > 0) (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: ss + filterExpr: (ss_store_sk > 0) (type: boolean) + Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ss_store_sk > 0) (type: boolean) + Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where s.s_company_id > 0 and ss.ss_quantity > 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@store_n0 +PREHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where s.s_company_id > 0 and ss.ss_quantity > 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_n0 +POSTHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: s + filterExpr: ((s_company_id > 0) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: ((s_company_id > 0) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: PARTIAL + TableScan + alias: ss + filterExpr: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 876 Data size: 6724 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 876 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 876 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 90 Data size: 360 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 90 Data size: 360 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where s.s_floor_space > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@store_n0 +PREHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where s.s_floor_space > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_n0 +POSTHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: s + filterExpr: ((s_floor_space > 0) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((s_floor_space > 0) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: ss + filterExpr: ss_store_sk is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ss_store_sk is not null (type: boolean) + Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where ss.ss_quantity > 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@store_n0 +PREHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where ss.ss_quantity > 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_n0 +POSTHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: s + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: ss + filterExpr: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 876 Data size: 6724 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 876 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 876 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 876 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 876 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) +PREHOOK: type: QUERY +PREHOOK: Input: default@store_n0 +PREHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_n0 +POSTHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + filterExpr: ss_store_sk is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ss_store_sk is not null (type: boolean) + Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: s + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 964 Data size: 7576 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 964 Data size: 7576 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + TableScan + alias: s1 + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 1000 +PREHOOK: type: QUERY +PREHOOK: Input: default@store_n0 +PREHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 1000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_n0 +POSTHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + filterExpr: (ss_store_sk > 1000) (type: boolean) + Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ss_store_sk > 1000) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: s + filterExpr: (s_store_sk > 1000) (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (s_store_sk > 1000) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + TableScan + alias: s1 + filterExpr: (s_store_sk > 1000) (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (s_store_sk > 1000) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) where s.s_floor_space > 1000 +PREHOOK: type: QUERY +PREHOOK: Input: default@store_n0 +PREHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) where s.s_floor_space > 1000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_n0 +POSTHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + filterExpr: ss_store_sk is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ss_store_sk is not null (type: boolean) + Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: s + filterExpr: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 964 Data size: 7576 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 964 Data size: 7576 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + TableScan + alias: s1 + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) where ss.ss_quantity > 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@store_n0 +PREHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) where ss.ss_quantity > 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_n0 +POSTHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + filterExpr: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 876 Data size: 6724 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 876 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 876 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: s + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 876 Data size: 6884 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 876 Data size: 6884 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + TableScan + alias: s1 + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 876 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 876 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 876 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join customer_address ca on (ca.ca_address_sk = ss.ss_addr_sk) +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@store_n0 +PREHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join customer_address ca on (ca.ca_address_sk = ss.ss_addr_sk) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@store_n0 +POSTHOOK: Input: default@store_sales_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + filterExpr: (ss_addr_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1000 Data size: 7664 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ss_addr_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 916 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_addr_sk (type: int), ss_store_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 916 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 916 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + TableScan + alias: s + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 916 Data size: 7148 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 916 Data size: 7148 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + TableScan + alias: ca + filterExpr: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 223 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 223 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 223 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table store_sales_n0 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store_sales_n0 +PREHOOK: Output: default@store_sales_n0 +POSTHOOK: query: drop table store_sales_n0 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store_sales_n0 +POSTHOOK: Output: default@store_sales_n0 +PREHOOK: query: drop table store_n0 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store_n0 +PREHOOK: Output: default@store_n0 +POSTHOOK: query: drop table store_n0 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store_n0 +POSTHOOK: Output: default@store_n0 +PREHOOK: query: drop table store_bigint +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store_bigint +PREHOOK: Output: default@store_bigint +POSTHOOK: query: drop table store_bigint +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store_bigint +POSTHOOK: Output: default@store_bigint +PREHOOK: query: drop table customer_address +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@customer_address +PREHOOK: Output: default@customer_address +POSTHOOK: query: drop table customer_address +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@customer_address +POSTHOOK: Output: default@customer_address diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_limit.q.out b/ql/src/test/results/clientpositive/annotate_stats_limit.q.out similarity index 87% rename from ql/src/test/results/clientpositive/llap/annotate_stats_limit.q.out rename to ql/src/test/results/clientpositive/annotate_stats_limit.q.out index b7bc38917b..ec1dc8bb25 100644 --- a/ql/src/test/results/clientpositive/llap/annotate_stats_limit.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_limit.q.out @@ -80,9 +80,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n5 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select * from loc_orc_n5 limit 4 @@ -103,11 +105,14 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n5 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 4 + Statistics: Num rows: 4 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select * from loc_orc_n5 limit 16 @@ -128,11 +133,14 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n5 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 16 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select * from loc_orc_n5 limit 0 diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out similarity index 75% rename from ql/src/test/results/clientpositive/llap/annotate_stats_part.q.out rename to ql/src/test/results/clientpositive/annotate_stats_part.q.out index fb0b15cb83..31fa1426a2 100644 --- a/ql/src/test/results/clientpositive/llap/annotate_stats_part.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -58,9 +58,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n4 + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: insert overwrite table loc_orc_n4 partition(year) select * from loc_staging_n4 @@ -100,9 +102,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n4 + Statistics: Num rows: 20 Data size: 7208 Basic stats: PARTIAL Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 20 Data size: 7208 Basic stats: PARTIAL Column stats: PARTIAL ListSink PREHOOK: query: analyze table loc_orc_n4 partition(year='2001') compute statistics @@ -136,9 +140,11 @@ STAGE PLANS: TableScan alias: loc_orc_n4 filterExpr: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean) + Statistics: Num rows: 9 Data size: 1764 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 1764 Basic stats: PARTIAL Column stats: NONE ListSink PREHOOK: query: explain select * from loc_orc_n4 @@ -163,9 +169,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n4 + Statistics: Num rows: 16 Data size: 6080 Basic stats: PARTIAL Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 16 Data size: 6080 Basic stats: PARTIAL Column stats: PARTIAL ListSink PREHOOK: query: explain select * from loc_orc_n4 where year='2001' @@ -189,9 +197,11 @@ STAGE PLANS: TableScan alias: loc_orc_n4 filterExpr: (year = '2001') (type: boolean) + Statistics: Num rows: 7 Data size: 1372 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), '2001' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 7 Data size: 1372 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: analyze table loc_orc_n4 partition(year) compute statistics @@ -227,9 +237,11 @@ STAGE PLANS: TableScan alias: loc_orc_n4 filterExpr: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean) + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select * from loc_orc_n4 @@ -254,9 +266,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n4 + Statistics: Num rows: 8 Data size: 3040 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 3040 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain select * from loc_orc_n4 where year='2001' or year='__HIVE_DEFAULT_PARTITION__' @@ -282,9 +296,11 @@ STAGE PLANS: TableScan alias: loc_orc_n4 filterExpr: (year) IN ('2001', '__HIVE_DEFAULT_PARTITION__') (type: boolean) + Statistics: Num rows: 8 Data size: 3040 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 3040 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain select * from loc_orc_n4 where year='2001' and year='__HIVE_DEFAULT_PARTITION__' @@ -341,9 +357,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n4 + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: zip (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select state from loc_orc_n4 @@ -368,9 +386,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n4 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string) outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain select year from loc_orc_n4 @@ -395,9 +415,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n4 + Statistics: Num rows: 8 Data size: 2246 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: year (type: string) outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select state,locid from loc_orc_n4 @@ -422,9 +444,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n4 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain select state,locid from loc_orc_n4 where year='2001' @@ -448,9 +472,11 @@ STAGE PLANS: TableScan alias: loc_orc_n4 filterExpr: (year = '2001') (type: boolean) + Statistics: Num rows: 7 Data size: 630 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 630 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select state,locid from loc_orc_n4 where year!='2001' @@ -474,9 +500,11 @@ STAGE PLANS: TableScan alias: loc_orc_n4 filterExpr: (year <> '2001') (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select * from loc_orc_n4 @@ -501,9 +529,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n4 + Statistics: Num rows: 8 Data size: 2192 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2192 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain select locid from loc_orc_n4 where locid>0 and year='2001' @@ -517,22 +547,38 @@ POSTHOOK: Input: default@loc_orc_n4 POSTHOOK: Input: default@loc_orc_n4@year=2001 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n4 + filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: locid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: loc_orc_n4 - filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) - Filter Operator - predicate: (locid > 0) (type: boolean) - Select Operator - expressions: locid (type: int) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: explain select locid,year from loc_orc_n4 where locid>0 and year='2001' PREHOOK: type: QUERY @@ -545,22 +591,38 @@ POSTHOOK: Input: default@loc_orc_n4 POSTHOOK: Input: default@loc_orc_n4@year=2001 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n4 + filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: locid (type: int), '2001' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: loc_orc_n4 - filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) - Filter Operator - predicate: (locid > 0) (type: boolean) - Select Operator - expressions: locid (type: int), '2001' (type: string) - outputColumnNames: _col0, _col1 - ListSink + ListSink PREHOOK: query: explain select * from (select locid,year from loc_orc_n4) test where locid>0 and year='2001' PREHOOK: type: QUERY @@ -573,20 +635,36 @@ POSTHOOK: Input: default@loc_orc_n4 POSTHOOK: Input: default@loc_orc_n4@year=2001 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n4 + filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: locid (type: int), '2001' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: loc_orc_n4 - filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) - Filter Operator - predicate: (locid > 0) (type: boolean) - Select Operator - expressions: locid (type: int), '2001' (type: string) - outputColumnNames: _col0, _col1 - ListSink + ListSink diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_select.q.out b/ql/src/test/results/clientpositive/annotate_stats_select.q.out similarity index 63% rename from ql/src/test/results/clientpositive/llap/annotate_stats_select.q.out rename to ql/src/test/results/clientpositive/annotate_stats_select.q.out index 91774d4ee4..dba27998a4 100644 --- a/ql/src/test/results/clientpositive/llap/annotate_stats_select.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -107,9 +107,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 3554 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: map), l1 (type: array), st1 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 2 Data size: 3554 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1 @@ -140,9 +142,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 3356 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: map), l1 (type: array), st1 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 2 Data size: 3356 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain select bo1 from alltypes_orc @@ -163,9 +167,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: bo1 (type: boolean) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select i1 as int1 from alltypes_orc @@ -186,9 +192,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i1 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select s1 from alltypes_orc @@ -209,9 +217,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s1 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select m1 from alltypes_orc @@ -232,9 +242,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1840 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: m1 (type: map) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 1840 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc @@ -255,9 +267,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 246 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), s1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 2 Data size: 246 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select null from alltypes_orc @@ -278,9 +292,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: null (type: void) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select 11 from alltypes_orc @@ -301,9 +317,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 11 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select 11L from alltypes_orc @@ -324,9 +342,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 11L (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select 11.0 from alltypes_orc @@ -347,9 +367,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 11 (type: decimal(2,0)) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select "hello" from alltypes_orc @@ -370,9 +392,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'hello' (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select cast("hello" as char(5)) from alltypes_orc @@ -393,9 +417,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'hello' (type: char(5)) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select cast("hello" as varchar(5)) from alltypes_orc @@ -416,9 +442,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'hello' (type: varchar(5)) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select unbase64("0xe23") from alltypes_orc @@ -439,9 +467,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: D317B6 (type: binary) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc @@ -462,9 +492,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 1Y (type: tinyint), 20S (type: smallint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc @@ -485,9 +517,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: TIMESTAMP'1970-12-31 15:59:58.174' (type: timestamp) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc @@ -508,9 +542,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: DATE'1970-12-31' (type: date) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select cast("58.174" as DECIMAL) from alltypes_orc @@ -531,9 +567,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 58 (type: decimal(10,0)) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select array(1,2,3) from alltypes_orc @@ -545,19 +583,33 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_orc #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(1,2,3) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypes_orc - Select Operator - expressions: array(1,2,3) (type: array) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: explain select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc PREHOOK: type: QUERY @@ -568,19 +620,33 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_orc #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: str_to_map('a=1 b=2 c=3',' ','=') (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypes_orc - Select Operator - expressions: str_to_map('a=1 b=2 c=3',' ','=') (type: map) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: explain select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc PREHOOK: type: QUERY @@ -591,19 +657,33 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_orc #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: named_struct('a',11,'b',11) (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypes_orc - Select Operator - expressions: named_struct('a',11,'b',11) (type: struct) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: explain select CREATE_UNION(0, "hello") from alltypes_orc PREHOOK: type: QUERY @@ -614,19 +694,33 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_orc #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: create_union(0,'hello') (type: uniontype) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypes_orc - Select Operator - expressions: create_union(0,'hello') (type: uniontype) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: explain select count(*) from alltypes_orc PREHOOK: type: QUERY @@ -682,9 +776,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 3356 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: map), l1 (type: array), st1 (type: struct), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 2 Data size: 3364 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain select i1 from (select i1 from alltypes_orc limit 10) temp @@ -705,11 +801,14 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i1 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select i1 from (select i1,11 from alltypes_orc limit 10) temp @@ -730,11 +829,14 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i1 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp @@ -751,53 +853,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypes_orc - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int) - outputColumnNames: _col0 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_orc + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), 'hello' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), 'hello' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -819,46 +911,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypes_orc - Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 11 (type: decimal(2,0)) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Limit + Number of rows: 10 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 11 (type: decimal(2,0)) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -880,53 +962,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypes_orc - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int) - outputColumnNames: _col0 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_orc + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), 'hello' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), 'hello' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -948,46 +1020,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypes_orc - Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'hello' (type: string), 11 (type: decimal(2,0)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Limit + Number of rows: 10 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'hello' (type: string), 11 (type: decimal(2,0)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1004,21 +1066,36 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_orc #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_orc + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: bo1 (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: bo1 (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypes_orc - Filter Operator - predicate: bo1 (type: boolean) - Select Operator - expressions: bo1 (type: boolean) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: explain select bo1 from alltypes_orc where !bo1 PREHOOK: type: QUERY @@ -1029,20 +1106,35 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_orc #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_orc + filterExpr: (not bo1) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (not bo1) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: bo1 (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: alltypes_orc - filterExpr: (not bo1) (type: boolean) - Filter Operator - predicate: (not bo1) (type: boolean) - Select Operator - expressions: bo1 (type: boolean) - outputColumnNames: _col0 - ListSink + ListSink diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_table.q.out b/ql/src/test/results/clientpositive/annotate_stats_table.q.out similarity index 71% rename from ql/src/test/results/clientpositive/llap/annotate_stats_table.q.out rename to ql/src/test/results/clientpositive/annotate_stats_table.q.out index ab3c65005d..9b9e31b214 100644 --- a/ql/src/test/results/clientpositive/llap/annotate_stats_table.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -46,9 +46,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging @@ -87,9 +89,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc + Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: analyze table emp_orc compute statistics @@ -118,9 +122,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc + Statistics: Num rows: 48 Data size: 8836 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 48 Data size: 8836 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: analyze table emp_orc compute statistics for columns deptid @@ -151,9 +157,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc + Statistics: Num rows: 48 Data size: 8840 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 48 Data size: 8840 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain select deptid from emp_orc @@ -174,9 +182,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: deptid (type: int) outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid @@ -207,9 +217,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc + Statistics: Num rows: 48 Data size: 4560 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 48 Data size: 4560 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select lastname from emp_orc @@ -230,9 +242,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc + Statistics: Num rows: 48 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: lastname (type: string) outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select deptid from emp_orc @@ -253,9 +267,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: deptid (type: int) outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select lastname,deptid from emp_orc @@ -276,9 +292,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc + Statistics: Num rows: 48 Data size: 4560 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 48 Data size: 4560 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: create table tmp_n0 as select 1 @@ -304,40 +322,51 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@tmp_n0 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2, Stage-0 - Stage-0 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 , consists of Stage-3, Stage-2, Stage-4 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2, Stage-5 + Stage-7 depends on stages: Stage-0 + Stage-2 + Stage-4 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmp_n0 - Execution mode: llap - LLAP IO: no inputs + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_n0 - Stage: Stage-2 - Dependency Collection + Stage: Stage-6 + Conditional Operator Stage: Stage-3 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-7 Create Table columns: _c0 int name: default.tmp_n0 @@ -345,7 +374,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_n0 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_n0 + + Stage: Stage-5 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/annotate_stats_udtf.q.out b/ql/src/test/results/clientpositive/annotate_stats_udtf.q.out new file mode 100644 index 0000000000..c094fc1fea --- /dev/null +++ b/ql/src/test/results/clientpositive/annotate_stats_udtf.q.out @@ -0,0 +1,275 @@ +PREHOOK: query: drop table if exists HIVE_20262 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists HIVE_20262 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table HIVE_20262 (a array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@HIVE_20262 +POSTHOOK: query: create table HIVE_20262 (a array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@HIVE_20262 +PREHOOK: query: insert into HIVE_20262 select array(1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hive_20262 +POSTHOOK: query: insert into HIVE_20262 select array(1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hive_20262 +POSTHOOK: Lineage: hive_20262.a EXPRESSION [] +PREHOOK: query: insert into HIVE_20262 select array(2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hive_20262 +POSTHOOK: query: insert into HIVE_20262 select array(2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hive_20262 +POSTHOOK: Lineage: hive_20262.a EXPRESSION [] +PREHOOK: query: explain select explode(array(1,2,3,4,5)) as col +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: explain select explode(array(1,2,3,4,5)) as col +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(1,2,3,4,5) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 5 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Select Operator + expressions: col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select explode(a) from HIVE_20262 +PREHOOK: type: QUERY +PREHOOK: Input: default@hive_20262 +#### A masked pattern was here #### +POSTHOOK: query: explain select explode(a) from HIVE_20262 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive_20262 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hive_20262 + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + function name: explode + Select Operator + expressions: col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select 1, r from HIVE_20262 + lateral view explode(a) t as r +PREHOOK: type: QUERY +PREHOOK: Input: default@hive_20262 +#### A masked pattern was here #### +POSTHOOK: query: explain select 1, r from HIVE_20262 + lateral view explode(a) t as r +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive_20262 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hive_20262 + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col4 + Statistics: Num rows: 12 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1 (type: int), _col4 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: a (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col4 + Statistics: Num rows: 12 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1 (type: int), _col4 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select explode(array(1,2,3,4,5)) as col +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: explain select explode(array(1,2,3,4,5)) as col +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(1,2,3,4,5) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Select Operator + expressions: col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select explode(a) from HIVE_20262 +PREHOOK: type: QUERY +PREHOOK: Input: default@hive_20262 +#### A masked pattern was here #### +POSTHOOK: query: explain select explode(a) from HIVE_20262 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive_20262 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hive_20262 + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + function name: explode + Select Operator + expressions: col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_union.q.out b/ql/src/test/results/clientpositive/annotate_stats_union.q.out similarity index 57% rename from ql/src/test/results/clientpositive/llap/annotate_stats_union.q.out rename to ql/src/test/results/clientpositive/annotate_stats_union.q.out index fcbf0db8c5..92108f21b3 100644 --- a/ql/src/test/results/clientpositive/llap/annotate_stats_union.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_union.q.out @@ -80,9 +80,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n3 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string) outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select * from (select state from loc_orc_n3 union all select state from loc_orc_n3) tmp @@ -99,51 +101,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n3 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: loc_orc_n3 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Union 2 - Vertex: Union 2 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n3 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: loc_orc_n3 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -169,9 +160,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select * from (select * from loc_orc_n3 union all select * from loc_orc_n3) tmp @@ -188,51 +181,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n3 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: loc_orc_n3 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Union 2 - Vertex: Union 2 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 16 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: loc_orc_n3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 16 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -350,51 +332,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n3 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: loc_orc_n3 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Union 2 - Vertex: Union 2 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n3 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: loc_orc_n3 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -418,51 +389,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_staging_n3 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: loc_orc_n3 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - Union 2 - Vertex: Union 2 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_staging_n3 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: loc_orc_n3 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/ansi_sql_arithmetic.q.out b/ql/src/test/results/clientpositive/ansi_sql_arithmetic.q.out new file mode 100644 index 0000000000..4760e986e1 --- /dev/null +++ b/ql/src/test/results/clientpositive/ansi_sql_arithmetic.q.out @@ -0,0 +1,100 @@ +PREHOOK: query: explain select cast(key as int) / cast(key as int) from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select cast(key as int) / cast(key as int) from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (CAST( UDFToInteger(key) AS decimal(10,0)) / CAST( UDFToInteger(key) AS decimal(10,0))) (type: decimal(21,11)) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 56000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select cast(key as int) / cast(key as int) from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select cast(key as int) / cast(key as int) from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1.00000000000 +PREHOOK: query: explain select cast(key as int) / cast(key as int) from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select cast(key as int) / cast(key as int) from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (UDFToDouble(UDFToInteger(key)) / UDFToDouble(UDFToInteger(key))) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select cast(key as int) / cast(key as int) from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select cast(key as int) / cast(key as int) from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1.0 diff --git a/ql/src/test/results/clientpositive/llap/array_map_access_nonconstant.q.out b/ql/src/test/results/clientpositive/array_map_access_nonconstant.q.out similarity index 91% rename from ql/src/test/results/clientpositive/llap/array_map_access_nonconstant.q.out rename to ql/src/test/results/clientpositive/array_map_access_nonconstant.q.out index 881f40a206..528e98ffe4 100644 --- a/ql/src/test/results/clientpositive/llap/array_map_access_nonconstant.q.out +++ b/ql/src/test/results/clientpositive/array_map_access_nonconstant.q.out @@ -36,9 +36,11 @@ STAGE PLANS: Processor Tree: TableScan alias: array_table + Statistics: Num rows: 4 Data size: 7696 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: index (type: int), array[index] (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 7696 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select index, `array`[index] from array_table @@ -91,9 +93,11 @@ STAGE PLANS: Processor Tree: TableScan alias: map_table + Statistics: Num rows: 4 Data size: 3696 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), data[key] (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 3696 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select key, data[key] from map_table diff --git a/ql/src/test/results/clientpositive/array_size_estimation.q.out b/ql/src/test/results/clientpositive/array_size_estimation.q.out new file mode 100644 index 0000000000..00166721ef --- /dev/null +++ b/ql/src/test/results/clientpositive/array_size_estimation.q.out @@ -0,0 +1,177 @@ +PREHOOK: query: create table t_n19 (col string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_n19 +POSTHOOK: query: create table t_n19 (col string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_n19 +PREHOOK: query: insert into t_n19 values ('x') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_n19 +POSTHOOK: query: insert into t_n19 values ('x') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_n19 +POSTHOOK: Lineage: t_n19.col SCRIPT [] +PREHOOK: query: explain +select array("b", "d", "c", "a") FROM t_n19 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_n19 +#### A masked pattern was here #### +POSTHOOK: query: explain +select array("b", "d", "c", "a") FROM t_n19 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_n19 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t_n19 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array('b','d','c','a') (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select array("b", "d", "c", col) FROM t_n19 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_n19 +#### A masked pattern was here #### +POSTHOOK: query: explain +select array("b", "d", "c", col) FROM t_n19 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_n19 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t_n19 + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array('b','d','c',col) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select sort_array(array("b", "d", "c", "a")),array("1","2") FROM t_n19 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_n19 +#### A masked pattern was here #### +POSTHOOK: query: explain +select sort_array(array("b", "d", "c", "a")),array("1","2") FROM t_n19 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_n19 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t_n19 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sort_array(array('b','d','c','a')) (type: array), array('1','2') (type: array) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select sort_array(array("b", "d", "c", col)),array("1","2") FROM t_n19 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_n19 +#### A masked pattern was here #### +POSTHOOK: query: explain +select sort_array(array("b", "d", "c", col)),array("1","2") FROM t_n19 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_n19 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t_n19 + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sort_array(array('b','d','c',col)) (type: array), array('1','2') (type: array) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2328 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2328 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/authorization_9.q.out b/ql/src/test/results/clientpositive/authorization_9.q.out similarity index 100% rename from ql/src/test/results/clientpositive/llap/authorization_9.q.out rename to ql/src/test/results/clientpositive/authorization_9.q.out diff --git a/ql/src/test/results/clientpositive/llap/authorization_explain.q.out b/ql/src/test/results/clientpositive/authorization_explain.q.out similarity index 92% rename from ql/src/test/results/clientpositive/llap/authorization_explain.q.out rename to ql/src/test/results/clientpositive/authorization_explain.q.out index 542d2cd849..61551f4ecb 100644 --- a/ql/src/test/results/clientpositive/llap/authorization_explain.q.out +++ b/ql/src/test/results/clientpositive/authorization_explain.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain authorization select * from src join srcpart PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -30,7 +30,7 @@ CURRENT_USER: hive_test_user OPERATION: QUERY -Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain formatted authorization select * from src join srcpart PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/llap/authorization_owner_actions_db.q.out b/ql/src/test/results/clientpositive/authorization_owner_actions_db.q.out similarity index 100% rename from ql/src/test/results/clientpositive/llap/authorization_owner_actions_db.q.out rename to ql/src/test/results/clientpositive/authorization_owner_actions_db.q.out diff --git a/ql/src/test/results/clientpositive/llap/authorization_view_1.q.out b/ql/src/test/results/clientpositive/authorization_view_1.q.out similarity index 99% rename from ql/src/test/results/clientpositive/llap/authorization_view_1.q.out rename to ql/src/test/results/clientpositive/authorization_view_1.q.out index b7fbf33b8e..eac3fdcedd 100644 --- a/ql/src/test/results/clientpositive/llap/authorization_view_1.q.out +++ b/ql/src/test/results/clientpositive/authorization_view_1.q.out @@ -222,15 +222,15 @@ POSTHOOK: Input: default@src_autho_test_n8 POSTHOOK: Input: default@v_n9 #### A masked pattern was here #### 238 +238 +86 86 311 +311 27 +27 +165 165 -409 -255 -278 -98 -484 PREHOOK: query: select key from (select value as key from v2_n7 union select value as key from v1_n13 union all select key from v_n9)subq limit 10 @@ -250,12 +250,12 @@ POSTHOOK: Input: default@v2_n7 POSTHOOK: Input: default@v_n9 #### A masked pattern was here #### val_0 +val_10 val_100 +val_103 val_104 +val_105 val_11 val_111 val_113 -val_116 -val_119 -val_125 -val_133 +val_114 diff --git a/ql/src/test/results/clientpositive/llap/authorization_view_disable_cbo_1.q.out b/ql/src/test/results/clientpositive/authorization_view_disable_cbo_1.q.out similarity index 99% rename from ql/src/test/results/clientpositive/llap/authorization_view_disable_cbo_1.q.out rename to ql/src/test/results/clientpositive/authorization_view_disable_cbo_1.q.out index 32517d7d71..0b50d93acf 100644 --- a/ql/src/test/results/clientpositive/llap/authorization_view_disable_cbo_1.q.out +++ b/ql/src/test/results/clientpositive/authorization_view_disable_cbo_1.q.out @@ -222,15 +222,15 @@ POSTHOOK: Input: default@src_autho_test_n9 POSTHOOK: Input: default@v_n10 #### A masked pattern was here #### 238 +238 +86 86 311 +311 27 +27 +165 165 -409 -255 -278 -98 -484 PREHOOK: query: select key from (select value as key from v2_n8 union select value as key from v1_n14 union all select key from v_n10)subq limit 10 @@ -250,15 +250,15 @@ POSTHOOK: Input: default@v2_n8 POSTHOOK: Input: default@v_n10 #### A masked pattern was here #### val_0 +val_10 val_100 +val_103 val_104 +val_105 val_11 val_111 val_113 -val_116 -val_119 -val_125 -val_133 +val_114 PREHOOK: query: select key from v_n10 cluster by key limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@src_autho_test_n9 @@ -293,13 +293,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src_autho_test_n9 POSTHOOK: Input: default@v_n10 #### A masked pattern was here #### +0 +0 +0 +0 +0 +0 10 10 100 100 -100 -100 -103 -103 -103 -103 diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_11.q.out b/ql/src/test/results/clientpositive/autoColumnStats_11.q.out similarity index 66% rename from ql/src/test/results/clientpositive/llap/autoColumnStats_11.q.out rename to ql/src/test/results/clientpositive/autoColumnStats_11.q.out index 8a05edf911..c7a97865e8 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_11.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_11.q.out @@ -16,78 +16,78 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@acs_t11 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(const struct(1,1,'2011-11-11')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Select Operator + expressions: col1 (type: int), col2 (type: int), CAST( col3 AS DATE) (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.acs_t11 Select Operator - expressions: array(const struct(1,1,'2011-11-11')) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - function name: inline - Select Operator - expressions: col1 (type: int), col2 (type: int), CAST( col3 AS DATE) (type: date) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.acs_t11 - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: date) - outputColumnNames: a, b, d - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(d, 'hll') - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: date) + outputColumnNames: a, b, d + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(d, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Dependency Collection + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-0 Move Operator @@ -99,7 +99,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.acs_t11 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -107,6 +107,36 @@ STAGE PLANS: Column Types: int, int, date Table: default.acs_t11 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.acs_t11 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.acs_t11 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + PREHOOK: query: insert into acs_t11 values(1,1,'2011-11-11') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -269,55 +299,45 @@ POSTHOOK: Output: default@acs_t11 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: acs_t11 - Statistics: Num rows: 5 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: int), b (type: int), d (type: date) - outputColumnNames: a, b, d - Statistics: Num rows: 5 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(d, 'hll') - minReductionHashAggr: 0.8 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: acs_t11 + Statistics: Num rows: 5 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int), b (type: int), d (type: date) + outputColumnNames: a, b, d + Statistics: Num rows: 5 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(d, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_4.q.out b/ql/src/test/results/clientpositive/autoColumnStats_4.q.out similarity index 57% rename from ql/src/test/results/clientpositive/llap/autoColumnStats_4.q.out rename to ql/src/test/results/clientpositive/autoColumnStats_4.q.out index 5e34ef39cb..a9b5ad0c48 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_4.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_4.q.out @@ -55,112 +55,89 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - filterExpr: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 9173 Data size: 671202 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: cint (type: int) - null sort order: z - Statistics: Num rows: 9173 Data size: 671202 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Select Operator - expressions: cint (type: int), CAST( cstring1 AS varchar(128)) (type: varchar(128)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9173 Data size: 977184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 9173 Data size: 977184 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: varchar(128)) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(128)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9173 Data size: 977184 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: varchar(128)) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + filterExpr: cint is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 9173 Data size: 671202 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(128)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid_dtt - Write Type: INSERT - Select Operator - expressions: _col0 (type: int), _col1 (type: varchar(128)) - outputColumnNames: a, b - Statistics: Num rows: 10 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') - minReductionHashAggr: 0.9 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial + expressions: cint (type: int), CAST( cstring1 AS varchar(128)) (type: varchar(128)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 9173 Data size: 1479384 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 9173 Data size: 1479384 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: varchar(128)) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(128)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9173 Data size: 1479384 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 - Dependency Collection + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: varchar(128)) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(128)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acid_dtt + Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: varchar(128)) + outputColumnNames: a, b + Statistics: Num rows: 10 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -181,6 +158,30 @@ STAGE PLANS: Column Types: int, varchar(128) Table: default.acid_dtt + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert into table acid_dtt select cint, cast(cstring1 as varchar(128)) from alltypesorc where cint is not null order by cint limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc @@ -213,7 +214,7 @@ Table Parameters: numFiles 2 numRows 10 rawDataSize 0 - totalSize 1884 + totalSize 1903 transactional true transactional_properties default #### A masked pattern was here #### @@ -255,10 +256,10 @@ Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} bucketing_version 2 - numFiles 3 + numFiles 4 numRows 8 rawDataSize 0 - totalSize 2596 + totalSize 3293 transactional true transactional_properties default #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_5.q.out b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out similarity index 52% rename from ql/src/test/results/clientpositive/llap/autoColumnStats_5.q.out rename to ql/src/test/results/clientpositive/autoColumnStats_5.q.out index 6d9150eb82..01793458a5 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_5.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out @@ -17,86 +17,86 @@ POSTHOOK: Output: default@partitioned1_n1@part=1 Explain STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: array(const struct(1,'original'),const struct(2,'original'),const struct(3,'original'),const struct(4,'original')) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - function name: inline - Select Operator - expressions: col1 (type: int), col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.partitioned1_n1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), UDFToInteger('1') (type: int) - outputColumnNames: a, b, part - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') - keys: part (type: int) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(const struct(1,'original'),const struct(2,'original'),const struct(3,'original'),const struct(4,'original')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + expressions: col1 (type: int), col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1_n1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), UDFToInteger('1') (type: int) + outputColumnNames: a, b, part + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + keys: part (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Dependency Collection + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-0 Move Operator @@ -110,7 +110,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.partitioned1_n1 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -118,6 +118,36 @@ STAGE PLANS: Column Types: int, string Table: default.partitioned1_n1 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1_n1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1_n1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + PREHOOK: query: insert into table partitioned1_n1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -243,86 +273,86 @@ POSTHOOK: Output: default@partitioned1_n1@part=2 Explain STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: array(const struct(1,'new',10,'ten'),const struct(2,'new',20,'twenty'),const struct(3,'new',30,'thirty'),const struct(4,'new',40,'forty')) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - function name: inline - Select Operator - expressions: col1 (type: int), col2 (type: string), col3 (type: int), col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.partitioned1_n1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), UDFToInteger('2') (type: int) - outputColumnNames: a, b, c, d, part - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll') - keys: part (type: int) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(const struct(1,'new',10,'ten'),const struct(2,'new',20,'twenty'),const struct(3,'new',30,'thirty'),const struct(4,'new',40,'forty')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + expressions: col1 (type: int), col2 (type: string), col3 (type: int), col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1_n1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), UDFToInteger('2') (type: int) + outputColumnNames: a, b, c, d, part + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll') + keys: part (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Dependency Collection + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-0 Move Operator @@ -336,7 +366,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.partitioned1_n1 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -344,6 +374,36 @@ STAGE PLANS: Column Types: int, string, int, string Table: default.partitioned1_n1 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1_n1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1_n1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + PREHOOK: query: insert into table partitioned1_n1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -427,86 +487,86 @@ POSTHOOK: Output: default@partitioned1_n1@part=1 Explain STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: array(const struct(5,'new',100,'hundred'),const struct(6,'new',200,'two hundred')) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - function name: inline - Select Operator - expressions: col1 (type: int), col2 (type: string), col3 (type: int), col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.partitioned1_n1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), UDFToInteger('1') (type: int) - outputColumnNames: a, b, c, d, part - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll') - keys: part (type: int) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(const struct(5,'new',100,'hundred'),const struct(6,'new',200,'two hundred')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + expressions: col1 (type: int), col2 (type: string), col3 (type: int), col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1_n1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), UDFToInteger('1') (type: int) + outputColumnNames: a, b, c, d, part + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll') + keys: part (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1732 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1764 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Dependency Collection + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-0 Move Operator @@ -520,7 +580,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.partitioned1_n1 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -528,6 +588,36 @@ STAGE PLANS: Column Types: int, string, int, string Table: default.partitioned1_n1 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1_n1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1_n1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + PREHOOK: query: insert into table partitioned1_n1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table diff --git a/ql/src/test/results/clientpositive/autoColumnStats_5a.q.out b/ql/src/test/results/clientpositive/autoColumnStats_5a.q.out new file mode 100644 index 0000000000..4bc9df4b80 --- /dev/null +++ b/ql/src/test/results/clientpositive/autoColumnStats_5a.q.out @@ -0,0 +1,1062 @@ +PREHOOK: query: CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: explain extended +insert into table partitioned1 partition(part=1) values(1, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: explain extended +insert into table partitioned1 partition(part=1) values(1, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@partitioned1@part=1 +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: array(const struct(1,'original')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Select Operator + expressions: col1 (type: int), col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: part=1/ + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), UDFToInteger('1') (type: int) + outputColumnNames: a, b, part + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + keys: part (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dummy_path + input format: org.apache.hadoop.hive.ql.io.NullRowsInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns + columns.comments + columns.types +#### A masked pattern was here #### + name _dummy_database._dummy_table + serialization.ddl struct _dummy_table { } + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.hive.ql.io.NullRowsInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns + columns.comments + columns.types +#### A masked pattern was here #### + name _dummy_database._dummy_table + serialization.ddl struct _dummy_table { } + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + name: _dummy_database._dummy_table + name: _dummy_database._dummy_table + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + + Stage: Stage-2 + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: a, b + Column Types: int, string + Table: default.partitioned1 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns part + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + name: default.partitioned1 + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns part + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + name: default.partitioned1 + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a SCRIPT [] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SCRIPT [] +col1 col2 +PREHOOK: query: desc formatted partitioned1 partition(part=1) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partitioned1 +POSTHOOK: query: desc formatted partitioned1 partition(part=1) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partitioned1 +col_name data_type comment +# col_name data_type comment +a int +b string + +# Partition Information +# col_name data_type comment +part int + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: partitioned1 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} + numFiles 1 + numRows 1 + rawDataSize 10 + totalSize 11 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended +insert into table partitioned1 partition(part=1) values(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: explain extended +insert into table partitioned1 partition(part=1) values(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@partitioned1@part=1 +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: array(const struct(2,'original'),const struct(3,'original'),const struct(4,'original')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Select Operator + expressions: col1 (type: int), col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: part=1/ + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), UDFToInteger('1') (type: int) + outputColumnNames: a, b, part + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + keys: part (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dummy_path + input format: org.apache.hadoop.hive.ql.io.NullRowsInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns + columns.comments + columns.types +#### A masked pattern was here #### + name _dummy_database._dummy_table + serialization.ddl struct _dummy_table { } + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.hive.ql.io.NullRowsInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns + columns.comments + columns.types +#### A masked pattern was here #### + name _dummy_database._dummy_table + serialization.ddl struct _dummy_table { } + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + name: _dummy_database._dummy_table + name: _dummy_database._dummy_table + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + + Stage: Stage-2 + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: a, b + Column Types: int, string + Table: default.partitioned1 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns part + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + name: default.partitioned1 + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns part + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10002 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.partitioned1 + partition_columns.types int + serialization.ddl struct partitioned1 { i32 a, string b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + name: default.partitioned1 + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert into table partitioned1 partition(part=1) values(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a SCRIPT [] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SCRIPT [] +col1 col2 +PREHOOK: query: explain insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: explain insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@partitioned1@part=1 +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(const struct(1,'original'),const struct(2,'original'),const struct(3,'original'),const struct(4,'original')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Select Operator + expressions: col1 (type: int), col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), UDFToInteger('1') (type: int) + outputColumnNames: a, b, part + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + keys: part (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, string + Table: default.partitioned1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: desc formatted partitioned1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partitioned1 +POSTHOOK: query: desc formatted partitioned1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partitioned1 +col_name data_type comment +# col_name data_type comment +a int +b string + +# Partition Information +# col_name data_type comment +part int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + bucketing_version 2 + numFiles 2 + numPartitions 1 + numRows 4 + rawDataSize 40 + totalSize 44 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted partitioned1 partition(part=1) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partitioned1 +POSTHOOK: query: desc formatted partitioned1 partition(part=1) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partitioned1 +col_name data_type comment +# col_name data_type comment +a int +b string + +# Partition Information +# col_name data_type comment +part int + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: partitioned1 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} + numFiles 2 + numRows 4 + rawDataSize 40 + totalSize 44 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted partitioned1 partition(part=1) a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partitioned1 +POSTHOOK: query: desc formatted partitioned1 partition(part=1) a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partitioned1 +column_property value +col_name a +data_type int +min 1 +max 4 +num_nulls 0 +distinct_count 4 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +comment from deserializer diff --git a/ql/src/test/results/clientpositive/autoColumnStats_7.q.out b/ql/src/test/results/clientpositive/autoColumnStats_7.q.out new file mode 100644 index 0000000000..902a48fb4e --- /dev/null +++ b/ql/src/test/results/clientpositive/autoColumnStats_7.q.out @@ -0,0 +1,223 @@ +PREHOOK: query: CREATE TABLE dest_g2_n5(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g2_n5 +POSTHOOK: query: CREATE TABLE dest_g2_n5(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g2_n5 +PREHOOK: query: CREATE TEMPORARY TABLE src_temp AS SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@src_temp +POSTHOOK: query: CREATE TEMPORARY TABLE src_temp AS SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_temp +PREHOOK: query: explain FROM src_temp +INSERT OVERWRITE TABLE dest_g2_n5 SELECT substr(src_temp.key,1,1), count(DISTINCT substr(src_temp.value,5)), concat(substr(src_temp.key,1,1),sum(substr(src_temp.value,5))) GROUP BY substr(src_temp.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src_temp +PREHOOK: Output: default@dest_g2_n5 +POSTHOOK: query: explain FROM src_temp +INSERT OVERWRITE TABLE dest_g2_n5 SELECT substr(src_temp.key,1,1), count(DISTINCT substr(src_temp.value,5)), concat(substr(src_temp.key,1,1),sum(substr(src_temp.value,5))) GROUP BY substr(src_temp.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_temp +POSTHOOK: Output: default@dest_g2_n5 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src_temp + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2_n5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2_n5 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2_n5 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src_temp +INSERT OVERWRITE TABLE dest_g2_n5 SELECT substr(src_temp.key,1,1), count(DISTINCT substr(src_temp.value,5)), concat(substr(src_temp.key,1,1),sum(substr(src_temp.value,5))) GROUP BY substr(src_temp.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src_temp +PREHOOK: Output: default@dest_g2_n5 +POSTHOOK: query: FROM src_temp +INSERT OVERWRITE TABLE dest_g2_n5 SELECT substr(src_temp.key,1,1), count(DISTINCT substr(src_temp.value,5)), concat(substr(src_temp.key,1,1),sum(substr(src_temp.value,5))) GROUP BY substr(src_temp.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_temp +POSTHOOK: Output: default@dest_g2_n5 +POSTHOOK: Lineage: dest_g2_n5.c1 EXPRESSION [(src_temp)src_temp.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest_g2_n5.c2 EXPRESSION [(src_temp)src_temp.FieldSchema(name:key, type:string, comment:null), (src_temp)src_temp.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest_g2_n5.key EXPRESSION [(src_temp)src_temp.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: SELECT dest_g2_n5.* FROM dest_g2_n5 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g2_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest_g2_n5.* FROM dest_g2_n5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g2_n5 +#### A masked pattern was here #### +0 1 00.0 +1 71 116414.0 +2 69 225571.0 +3 62 332004.0 +4 74 452763.0 +5 6 5397.0 +6 5 6398.0 +7 6 7735.0 +8 8 8762.0 +9 7 91047.0 +PREHOOK: query: DROP TABLE dest_g2_n5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_g2_n5 +PREHOOK: Output: default@dest_g2_n5 +POSTHOOK: query: DROP TABLE dest_g2_n5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_g2_n5 +POSTHOOK: Output: default@dest_g2_n5 +PREHOOK: query: DROP TABLE src_temp +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_temp +PREHOOK: Output: default@src_temp +POSTHOOK: query: DROP TABLE src_temp +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_temp +POSTHOOK: Output: default@src_temp diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_8.q.out b/ql/src/test/results/clientpositive/autoColumnStats_8.q.out similarity index 74% rename from ql/src/test/results/clientpositive/llap/autoColumnStats_8.q.out rename to ql/src/test/results/clientpositive/autoColumnStats_8.q.out index b95c16a2cd..de7352c36d 100644 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_8.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_8.q.out @@ -57,425 +57,384 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1, Stage-5 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (ds <= '2008-04-08') (type: boolean) - Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (ds <= '2008-04-08') (type: boolean) + Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string + NumFilesPerFileSink: 1 + Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - name default.nzhang_part8 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct nzhang_part8 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part8 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: key, value, ds, hr - Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string), hr (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) - auto parallelism: true - Filter Operator - isSamplingPred: false - predicate: (ds > '2008-04-08') (type: boolean) - Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 2 + name default.nzhang_part8 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct nzhang_part8 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-12-31/ - Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part8 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: (ds > '2008-04-08') (type: boolean) + Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 2 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-12-31/ + Statistics: Num rows: 666 Data size: 241092 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - name default.nzhang_part8 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct nzhang_part8 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part8 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, hr - Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: '2008-12-31' (type: string), hr (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: '2008-12-31' (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: + name default.nzhang_part8 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct nzhang_part8 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - Path -> Partition: + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part8 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: #### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + Path -> Partition: #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - /srcpart/ds=2008-04-09/hr=11 [srcpart] - /srcpart/ds=2008-04-09/hr=12 [srcpart] - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Reducer 3 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: '2008-12-31' (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=11 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-3 - Dependency Collection + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -506,7 +465,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part8 - Stage: Stage-4 + Stage: Stage-3 Stats Work Basic Stats Work: #### A masked pattern was here #### @@ -540,7 +499,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part8 - Stage: Stage-5 + Stage: Stage-4 Stats Work Basic Stats Work: #### A masked pattern was here #### @@ -550,6 +509,83 @@ STAGE PLANS: Table: default.nzhang_part8 Is Table Level Stats: false + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: from srcpart insert overwrite table nzhang_part8 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' insert overwrite table nzhang_part8 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' diff --git a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out new file mode 100644 index 0000000000..0d3eb2cf7c --- /dev/null +++ b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out @@ -0,0 +1,302 @@ +PREHOOK: query: CREATE TABLE dest_j1_n23(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1_n23 +POSTHOOK: query: CREATE TABLE dest_j1_n23(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1_n23 +PREHOOK: query: EXPLAIN +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1_n23 SELECT src1.key, src2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_j1_n23 +POSTHOOK: query: EXPLAIN +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1_n23 SELECT src1.key, src2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_j1_n23 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-6 depends on stages: Stage-1 , consists of Stage-7, Stage-0, Stage-3 + Stage-7 + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + handleSkewJoin: true + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1_n23 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Conditional Operator + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + 1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + 1 + TableScan + HashTable Sink Operator + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + outputColumnNames: _col0, _col2 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1_n23 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1_n23 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1_n23 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1_n23 SELECT src1.key, src2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_j1_n23 +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1_n23 SELECT src1.key, src2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_j1_n23 +POSTHOOK: Lineage: dest_j1_n23.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1_n23.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select 'cnt, check desc',count(*) from dest_j1_n23 group by key*key >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1_n23 +#### A masked pattern was here #### +POSTHOOK: query: select 'cnt, check desc',count(*) from dest_j1_n23 group by key*key >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1_n23 +#### A masked pattern was here #### +cnt, check desc 1028 +PREHOOK: query: desc formatted dest_j1_n23 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@dest_j1_n23 +POSTHOOK: query: desc formatted dest_j1_n23 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@dest_j1_n23 +# col_name data_type comment +key int +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + bucketing_version 2 + numFiles 137 + numRows 1028 + rawDataSize 10968 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted dest_j1_n23 key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@dest_j1_n23 +POSTHOOK: query: desc formatted dest_j1_n23 key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@dest_j1_n23 +col_name key +data_type int +min 0 +max 498 +num_nulls 0 +distinct_count 303 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} +PREHOOK: query: desc formatted dest_j1_n23 value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@dest_j1_n23 +POSTHOOK: query: desc formatted dest_j1_n23 value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@dest_j1_n23 +col_name value +data_type string +min +max +num_nulls 0 +distinct_count 307 +avg_col_len 6.834630350194552 +max_col_len 7 +num_trues +num_falses +bit_vector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/auto_join10.q.out b/ql/src/test/results/clientpositive/auto_join10.q.out new file mode 100644 index 0000000000..3f750166ef --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join10.q.out @@ -0,0 +1,129 @@ +PREHOOK: query: explain +FROM +(SELECT src.* FROM src) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +FROM +(SELECT src.* FROM src) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_0:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_0:src + TableScan + alias: src + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col1,_col2) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT src.* FROM src) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT src.* FROM src) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +103231310608 diff --git a/ql/src/test/results/clientpositive/auto_join11.q.out b/ql/src/test/results/clientpositive/auto_join11.q.out new file mode 100644 index 0000000000..6cdfa34d5e --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join11.q.out @@ -0,0 +1,129 @@ +PREHOOK: query: explain +SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_0:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_0:src + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0,_col2) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +-101333194320 diff --git a/ql/src/test/results/clientpositive/auto_join12.q.out b/ql/src/test/results/clientpositive/auto_join12.q.out new file mode 100644 index 0000000000..60a8f4732f --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join12.q.out @@ -0,0 +1,168 @@ +PREHOOK: query: explain +SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 = src3.c5 AND src3.c5 < 80 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 = src3.c5 AND src3.c5 < 80 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_0:src + Fetch Operator + limit: -1 + $hdt$_0:$hdt$_1:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_0:src + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 80.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 80.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_0:$hdt$_1:src + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 80.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 80.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 80.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 80.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 = src3.c5 AND src3.c5 < 80 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 = src3.c5 AND src3.c5 < 80 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +-136843922952 diff --git a/ql/src/test/results/clientpositive/auto_join13.q.out b/ql/src/test/results/clientpositive/auto_join13.q.out new file mode 100644 index 0000000000..5f83b393fe --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join13.q.out @@ -0,0 +1,168 @@ +PREHOOK: query: explain +SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_0:src + Fetch Operator + limit: -1 + $hdt$_0:$hdt$_1:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_0:src + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 200.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 200.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToDouble(key) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) + 1 _col0 (type: double) + $hdt$_0:$hdt$_1:src + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 43990 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) + 1 _col0 (type: double) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col2,_col1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +-97670109576 diff --git a/ql/src/test/results/clientpositive/auto_join14.q.out b/ql/src/test/results/clientpositive/auto_join14.q.out new file mode 100644 index 0000000000..238befeb5c --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join14.q.out @@ -0,0 +1,180 @@ +PREHOOK: query: CREATE TABLE dest1_n83(c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n83 +POSTHOOK: query: CREATE TABLE dest1_n83(c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n83 +PREHOOK: query: explain +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1_n83 SELECT src.key, srcpart.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1_n83 +POSTHOOK: query: explain +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1_n83 SELECT src.key, srcpart.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1_n83 +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src + TableScan + alias: src + filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + filterExpr: ((UDFToDouble(key) > 100.0D) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 174 Data size: 30972 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 174 Data size: 16530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 174 Data size: 16530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n83 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 174 Data size: 16530 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n83 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1_n83 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1_n83 SELECT src.key, srcpart.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1_n83 +POSTHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1_n83 SELECT src.key, srcpart.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1_n83 +POSTHOOK: Lineage: dest1_n83.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n83.c2 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1_n83.c1,dest1_n83.c2)) FROM dest1_n83 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n83 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1_n83.c1,dest1_n83.c2)) FROM dest1_n83 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n83 +#### A masked pattern was here #### +404554174174 diff --git a/ql/src/test/results/clientpositive/auto_join15.q.out b/ql/src/test/results/clientpositive/auto_join15.q.out new file mode 100644 index 0000000000..15fa1d263d --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join15.q.out @@ -0,0 +1,129 @@ +PREHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +SORT BY k1, v1, k2, v2 +) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +SORT BY k1, v1, k2, v2 +) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_0:src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_0:src1 + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 791 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +SORT BY k1, v1, k2, v2 +) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +SORT BY k1, v1, k2, v2 +) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +524272996896 diff --git a/ql/src/test/results/clientpositive/auto_join16.q.out b/ql/src/test/results/clientpositive/auto_join16.q.out new file mode 100644 index 0000000000..f13105665c --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join16.q.out @@ -0,0 +1,129 @@ +PREHOOK: query: explain +SELECT sum(hash(subq.key, tab.value)) +FROM +(select a.key, a.value from src a where a.key > 10 ) subq +JOIN src tab +ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) +where tab.value < 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT sum(hash(subq.key, tab.value)) +FROM +(select a.key, a.value from src a where a.key > 10 ) subq +JOIN src tab +ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) +where tab.value < 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_0:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_0:a + TableScan + alias: a + filterExpr: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(value) < 200.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(value) < 200.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: tab + filterExpr: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(value) < 200.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(value) < 200.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(subq.key, tab.value)) +FROM +(select a.key, a.value from src a where a.key > 10 ) subq +JOIN src tab +ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) +where tab.value < 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(subq.key, tab.value)) +FROM +(select a.key, a.value from src a where a.key > 10 ) subq +JOIN src tab +ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) +where tab.value < 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL diff --git a/ql/src/test/results/clientpositive/auto_join17.q.out b/ql/src/test/results/clientpositive/auto_join17.q.out new file mode 100644 index 0000000000..88185e790c --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join17.q.out @@ -0,0 +1,170 @@ +PREHOOK: query: CREATE TABLE dest1_n41(key1 INT, value1 STRING, key2 INT, value2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n41 +POSTHOOK: query: CREATE TABLE dest1_n41(key1 INT, value1 STRING, key2 INT, value2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n41 +PREHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n41 SELECT src1.*, src2.* +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n41 +POSTHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n41 SELECT src1.*, src2.* +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n41 +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src1 + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 791 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n41 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n41 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1_n41 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n41 SELECT src1.*, src2.* +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n41 +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n41 SELECT src1.*, src2.* +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n41 +POSTHOOK: Lineage: dest1_n41.key1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n41.key2 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n41.value1 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n41.value2 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1_n41.key1,dest1_n41.value1,dest1_n41.key2,dest1_n41.value2)) FROM dest1_n41 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n41 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1_n41.key1,dest1_n41.value1,dest1_n41.key2,dest1_n41.value2)) FROM dest1_n41 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n41 +#### A masked pattern was here #### +-793937029770 diff --git a/ql/src/test/results/clientpositive/auto_join18.q.out b/ql/src/test/results/clientpositive/auto_join18.q.out new file mode 100644 index 0000000000..0473494c2a --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join18.q.out @@ -0,0 +1,223 @@ +PREHOOK: query: explain + SELECT sum(hash(a.key, a.value, b.key, b.value)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain + SELECT sum(hash(a.key, a.value, b.key, b.value)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(value) + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 274 Data size: 2457 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 274 Data size: 2457 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT value) + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key, a.value, b.key, b.value)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key, a.value, b.key, b.value)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +379685492277 diff --git a/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out new file mode 100644 index 0000000000..961a320982 --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out @@ -0,0 +1,227 @@ +PREHOOK: query: explain + SELECT sum(hash(a.key, a.value, b.key, b.value1, b.value2)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain + SELECT sum(hash(a.key, a.value, b.key, b.value1, b.value2)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(value) + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1224 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 274 Data size: 2561 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3,_col4) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 274 Data size: 2561 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT value), count(DISTINCT key) + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 2292 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 2292 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 1224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key, a.value, b.key, b.value1, b.value2)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key, a.value, b.key, b.value1, b.value2)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +40694575227 diff --git a/ql/src/test/results/clientpositive/auto_join19.q.out b/ql/src/test/results/clientpositive/auto_join19.q.out new file mode 100644 index 0000000000..cad3e0971e --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join19.q.out @@ -0,0 +1,192 @@ +PREHOOK: query: CREATE TABLE dest1_n18(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n18 +POSTHOOK: query: CREATE TABLE dest1_n18(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n18 +PREHOOK: query: explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n18 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@dest1_n18 +POSTHOOK: query: explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n18 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@dest1_n18 +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:src2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:src2 + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: ((ds) IN ('2008-04-08', '2008-04-09') and (hr) IN ('12', '11') and key is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3164 Data size: 563192 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n18 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n18 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1_n18 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n18 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@dest1_n18 +POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n18 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@dest1_n18 +POSTHOOK: Lineage: dest1_n18.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n18.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1_n18.key,dest1_n18.value)) FROM dest1_n18 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n18 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1_n18.key,dest1_n18.value)) FROM dest1_n18 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n18 +#### A masked pattern was here #### +407444119660 diff --git a/ql/src/test/results/clientpositive/auto_join19_inclause.q.out b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out new file mode 100644 index 0000000000..f42743a39f --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out @@ -0,0 +1,192 @@ +PREHOOK: query: CREATE TABLE dest1_n11(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n11 +POSTHOOK: query: CREATE TABLE dest1_n11(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n11 +PREHOOK: query: explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n11 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@dest1_n11 +POSTHOOK: query: explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n11 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@dest1_n11 +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:src2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:src2 + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: ((ds) IN ('2008-04-08', '2008-04-09') and (hr) IN ('12', '11') and key is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3164 Data size: 563192 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n11 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n11 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1_n11 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n11 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@dest1_n11 +POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n11 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@dest1_n11 +POSTHOOK: Lineage: dest1_n11.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n11.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1_n11.key,dest1_n11.value)) FROM dest1_n11 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n11 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1_n11.key,dest1_n11.value)) FROM dest1_n11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n11 +#### A masked pattern was here #### +407444119660 diff --git a/ql/src/test/results/clientpositive/auto_join2.q.out b/ql/src/test/results/clientpositive/auto_join2.q.out new file mode 100644 index 0000000000..c694679e6f --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join2.q.out @@ -0,0 +1,195 @@ +PREHOOK: query: CREATE TABLE dest_j2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j2 +POSTHOOK: query: CREATE TABLE dest_j2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j2 +PREHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) +INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_j2 +POSTHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) +INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_j2 +STAGE DEPENDENCIES: + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src1 + Fetch Operator + limit: -1 + $hdt$_2:src3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src1 + TableScan + alias: src1 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_2:src3 + TableScan + alias: src3 + filterExpr: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 (_col1 + _col3) (type: double) + 1 _col1 (type: double) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 791 Data size: 81473 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col1 + _col3) (type: double) + 1 _col1 (type: double) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) +INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_j2 +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) +INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_j2 +POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest_j2.key,dest_j2.value)) FROM dest_j2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest_j2.key,dest_j2.value)) FROM dest_j2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +33815990627 diff --git a/ql/src/test/results/clientpositive/auto_join20.q.out b/ql/src/test/results/clientpositive/auto_join20.q.out new file mode 100644 index 0000000000..6475ad2ee0 --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join20.q.out @@ -0,0 +1,502 @@ +PREHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-10 is a root stage + Stage-8 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_1:src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_1:src1 + TableScan + alias: src1 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME + TableScan + HashTable Sink Operator + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {VALUE._col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +56157587016 +PREHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-10 is a root stage + Stage-8 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_1:src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_1:src1 + TableScan + alias: src1 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME + TableScan + HashTable Sink Operator + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {VALUE._col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +56157587016 diff --git a/ql/src/test/results/clientpositive/auto_join22.q.out b/ql/src/test/results/clientpositive/auto_join22.q.out new file mode 100644 index 0000000000..5a98716fed --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join22.q.out @@ -0,0 +1,136 @@ +PREHOOK: query: explain +SELECT sum(hash(src5.src1_value)) FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT sum(hash(src5.src1_value)) FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_0:$hdt$_0:src1 + Fetch Operator + limit: -1 + $hdt$_0:$hdt$_1:src4 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_0:$hdt$_0:src1 + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_0:$hdt$_1:src4 + TableScan + alias: src4 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 177500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 1251 Data size: 113841 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1251 Data size: 113841 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(src5.src1_value)) FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(src5.src1_value)) FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +344337359100 diff --git a/ql/src/test/results/clientpositive/auto_join23.q.out b/ql/src/test/results/clientpositive/auto_join23.q.out new file mode 100644 index 0000000000..e91161ad5c --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join23.q.out @@ -0,0 +1,199 @@ +Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src1 + TableScan + alias: src1 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + null sort order: zzzz + sort order: ++++ + Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 2 val_2 +0 val_0 2 val_2 +0 val_0 2 val_2 +0 val_0 4 val_4 +0 val_0 4 val_4 +0 val_0 4 val_4 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 8 val_8 +0 val_0 8 val_8 +0 val_0 8 val_8 +0 val_0 9 val_9 +0 val_0 9 val_9 +0 val_0 9 val_9 +2 val_2 0 val_0 +2 val_2 0 val_0 +2 val_2 0 val_0 +2 val_2 2 val_2 +2 val_2 4 val_4 +2 val_2 5 val_5 +2 val_2 5 val_5 +2 val_2 5 val_5 +2 val_2 8 val_8 +2 val_2 9 val_9 +4 val_4 0 val_0 +4 val_4 0 val_0 +4 val_4 0 val_0 +4 val_4 2 val_2 +4 val_4 4 val_4 +4 val_4 5 val_5 +4 val_4 5 val_5 +4 val_4 5 val_5 +4 val_4 8 val_8 +4 val_4 9 val_9 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 2 val_2 +5 val_5 2 val_2 +5 val_5 2 val_2 +5 val_5 4 val_4 +5 val_5 4 val_4 +5 val_5 4 val_4 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 8 val_8 +5 val_5 8 val_8 +5 val_5 8 val_8 +5 val_5 9 val_9 +5 val_5 9 val_9 +5 val_5 9 val_9 +8 val_8 0 val_0 +8 val_8 0 val_0 +8 val_8 0 val_0 +8 val_8 2 val_2 +8 val_8 4 val_4 +8 val_8 5 val_5 +8 val_8 5 val_5 +8 val_8 5 val_5 +8 val_8 8 val_8 +8 val_8 9 val_9 +9 val_9 0 val_0 +9 val_9 0 val_0 +9 val_9 0 val_0 +9 val_9 2 val_2 +9 val_9 4 val_4 +9 val_9 5 val_5 +9 val_9 5 val_5 +9 val_9 5 val_5 +9 val_9 8 val_8 +9 val_9 9 val_9 diff --git a/ql/src/test/results/clientpositive/auto_join24.q.out b/ql/src/test/results/clientpositive/auto_join24.q.out new file mode 100644 index 0000000000..56168c3c48 --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join24.q.out @@ -0,0 +1,125 @@ +PREHOOK: query: create table tst1_n2(key STRING, cnt INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tst1_n2 +POSTHOOK: query: create table tst1_n2(key STRING, cnt INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tst1_n2 +PREHOOK: query: INSERT OVERWRITE TABLE tst1_n2 +SELECT a.key, count(1) FROM src a group by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tst1_n2 +POSTHOOK: query: INSERT OVERWRITE TABLE tst1_n2 +SELECT a.key, count(1) FROM src a group by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tst1_n2 +POSTHOOK: Lineage: tst1_n2.cnt EXPRESSION [(src)a.null, ] +POSTHOOK: Lineage: tst1_n2.key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: explain +SELECT sum(a.cnt) FROM tst1_n2 a JOIN tst1_n2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tst1_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT sum(a.cnt) FROM tst1_n2 a JOIN tst1_n2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tst1_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:a + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 309 Data size: 28119 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 309 Data size: 28119 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), cnt (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 309 Data size: 28119 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 309 Data size: 1236 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(a.cnt) FROM tst1_n2 a JOIN tst1_n2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tst1_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(a.cnt) FROM tst1_n2 a JOIN tst1_n2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tst1_n2 +#### A masked pattern was here #### +500 diff --git a/ql/src/test/results/clientpositive/llap/auto_join25.q.out b/ql/src/test/results/clientpositive/auto_join25.q.out similarity index 81% rename from ql/src/test/results/clientpositive/llap/auto_join25.q.out rename to ql/src/test/results/clientpositive/auto_join25.q.out index 73a8b86bbc..11288ff31f 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join25.q.out +++ b/ql/src/test/results/clientpositive/auto_join25.q.out @@ -18,6 +18,9 @@ PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1_n62 +Hive Runtime Error: Map local work exhausted memory +FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask +ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1_n62 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') @@ -31,10 +34,12 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest1_n62 POSTHOOK: Lineage: dest1_n62.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1_n62.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-7:CONDITIONAL +RUN: Stage-8:MAPREDLOCAL RUN: Stage-1:MAPRED -RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE -RUN: Stage-3:STATS +RUN: Stage-3:MAPRED +RUN: Stage-2:STATS PREHOOK: query: SELECT sum(hash(dest1_n62.key,dest1_n62.value)) FROM dest1_n62 PREHOOK: type: QUERY PREHOOK: Input: default@dest1_n62 @@ -59,6 +64,12 @@ INSERT OVERWRITE TABLE dest_j2_n0 SELECT src1.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest_j2_n0 +Hive Runtime Error: Map local work exhausted memory +FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask +ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask +Hive Runtime Error: Map local work exhausted memory +FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask +ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2_n0 SELECT src1.key, src3.value POSTHOOK: type: QUERY @@ -66,9 +77,14 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j2_n0 POSTHOOK: Lineage: dest_j2_n0.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2_n0.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-12:CONDITIONAL +RUN: Stage-15:MAPREDLOCAL RUN: Stage-1:MAPRED -RUN: Stage-2:DEPENDENCY_COLLECTION +RUN: Stage-9:CONDITIONAL +RUN: Stage-13:MAPREDLOCAL +RUN: Stage-2:MAPRED RUN: Stage-0:MOVE +RUN: Stage-4:MAPRED RUN: Stage-3:STATS PREHOOK: query: SELECT sum(hash(dest_j2_n0.key,dest_j2_n0.value)) FROM dest_j2_n0 PREHOOK: type: QUERY @@ -94,6 +110,9 @@ INSERT OVERWRITE TABLE dest_j1_n5 SELECT src1.key, src2.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest_j1_n5 +Hive Runtime Error: Map local work exhausted memory +FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask +ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1_n5 SELECT src1.key, src2.value POSTHOOK: type: QUERY @@ -101,10 +120,12 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j1_n5 POSTHOOK: Lineage: dest_j1_n5.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1_n5.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-7:CONDITIONAL +RUN: Stage-8:MAPREDLOCAL RUN: Stage-1:MAPRED -RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE -RUN: Stage-3:STATS +RUN: Stage-3:MAPRED +RUN: Stage-2:STATS PREHOOK: query: SELECT sum(hash(dest_j1_n5.key,dest_j1_n5.value)) FROM dest_j1_n5 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1_n5 diff --git a/ql/src/test/results/clientpositive/auto_join26.q.out b/ql/src/test/results/clientpositive/auto_join26.q.out new file mode 100644 index 0000000000..dcd65a0e72 --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join26.q.out @@ -0,0 +1,208 @@ +PREHOOK: query: CREATE TABLE dest_j1(key INT, cnt INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: CREATE TABLE dest_j1(key INT, cnt INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest_j1 +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:x + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:x + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ] +POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +128 3 +146 2 +150 1 +213 2 +224 2 +238 2 +255 2 +273 3 +278 2 +311 3 +369 3 +401 5 +406 4 +66 1 +98 2 diff --git a/ql/src/test/results/clientpositive/auto_join27.q.out b/ql/src/test/results/clientpositive/auto_join27.q.out new file mode 100644 index 0000000000..52d41bb6fb --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join27.q.out @@ -0,0 +1,217 @@ +PREHOOK: query: explain +SELECT count(1) +FROM +( +SELECT src.key, src.value from src +UNION ALL +SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +(SELECT src.key as k, src.value as v from src) src3 +ON src_12.key = src3.k AND src3.k < 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT count(1) +FROM +( +SELECT src.key, src.value from src +UNION ALL +SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +(SELECT src.key as k, src.value as v from src) src3 +ON src_12.key = src3.k AND src3.k < 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-7 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 200.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 200.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:src + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 200.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 200.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 200.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 200.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 249 Data size: 21663 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 249 Data size: 1992 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TableScan + Union + Statistics: Num rows: 249 Data size: 21663 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 249 Data size: 1992 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(1) +FROM +( +SELECT src.key, src.value from src +UNION ALL +SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +(SELECT src.key as k, src.value as v from src) src3 +ON src_12.key = src3.k AND src3.k < 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(1) +FROM +( +SELECT src.key, src.value from src +UNION ALL +SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +(SELECT src.key as k, src.value as v from src) src3 +ON src_12.key = src3.k AND src3.k < 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +548 diff --git a/ql/src/test/results/clientpositive/auto_join28.q.out b/ql/src/test/results/clientpositive/auto_join28.q.out new file mode 100644 index 0000000000..e64539efd4 --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join28.q.out @@ -0,0 +1,646 @@ +PREHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-10 is a root stage + Stage-8 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:$hdt$_1:src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:$hdt$_1:src1 + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {VALUE._col1} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:src2 + Fetch Operator + limit: -1 + $hdt$_2:src3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:src2 + TableScan + alias: src2 + filterExpr: (UDFToDouble(key) > 10.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) > 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 {(UDFToDouble(_col0) < 10.0D)} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_2:src3 + TableScan + alias: src3 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {(UDFToDouble(_col0) < 10.0D)} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src1 + Fetch Operator + limit: -1 + $hdt$_2:src3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src1 + TableScan + alias: src1 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_2:src3 + TableScan + alias: src3 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(UDFToDouble(_col0) > 10.0D)} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-10 is a root stage + Stage-8 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_1:src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_1:src1 + TableScan + alias: src1 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) > 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {_col2} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {VALUE._col1} + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/auto_join3.q.out b/ql/src/test/results/clientpositive/auto_join3.q.out new file mode 100644 index 0000000000..7029bc4178 --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join3.q.out @@ -0,0 +1,195 @@ +PREHOOK: query: CREATE TABLE dest1_n140(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n140 +POSTHOOK: query: CREATE TABLE dest1_n140(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n140 +PREHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) +INSERT OVERWRITE TABLE dest1_n140 SELECT src1.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n140 +POSTHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) +INSERT OVERWRITE TABLE dest1_n140 SELECT src1.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n140 +STAGE DEPENDENCIES: + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src1 + Fetch Operator + limit: -1 + $hdt$_2:src3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src1 + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_2:src3 + TableScan + alias: src3 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n140 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n140 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1_n140 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) +INSERT OVERWRITE TABLE dest1_n140 SELECT src1.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n140 +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) +INSERT OVERWRITE TABLE dest1_n140 SELECT src1.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n140 +POSTHOOK: Lineage: dest1_n140.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n140.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1_n140.key,dest1_n140.value)) FROM dest1_n140 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n140 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1_n140.key,dest1_n140.value)) FROM dest1_n140 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n140 +#### A masked pattern was here #### +344360994461 diff --git a/ql/src/test/results/clientpositive/auto_join31.q.out b/ql/src/test/results/clientpositive/auto_join31.q.out new file mode 100644 index 0000000000..0c0e474220 --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join31.q.out @@ -0,0 +1,168 @@ +PREHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_0:src + Fetch Operator + limit: -1 + $hdt$_0:$hdt$_2:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_0:src + TableScan + alias: src + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_0:$hdt$_2:src + TableScan + alias: src + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +348019368476 diff --git a/ql/src/test/results/clientpositive/auto_join32.q.out b/ql/src/test/results/clientpositive/auto_join32.q.out new file mode 100644 index 0000000000..f215592bac --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join32.q.out @@ -0,0 +1,555 @@ +PREHOOK: query: create table studenttab10k (name string, age int, gpa double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@studenttab10k +POSTHOOK: query: create table studenttab10k (name string, age int, gpa double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@studenttab10k +PREHOOK: query: create table votertab10k (name string, age int, registration string, contributions float) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@votertab10k +POSTHOOK: query: create table votertab10k (name string, age int, registration string, contributions float) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@votertab10k +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k +PREHOOK: Input: default@votertab10k +#### A masked pattern was here #### +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k +POSTHOOK: Input: default@votertab10k +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:s + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:s + TableScan + alias: s + filterExpr: name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: v + filterExpr: name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: name (type: string), registration (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col2) + keys: _col0 (type: string), _col2 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k +PREHOOK: Input: default@votertab10k +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k +POSTHOOK: Input: default@votertab10k +#### A masked pattern was here #### +PREHOOK: query: create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@studenttab10k_smb +POSTHOOK: query: create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@votertab10k_smb +POSTHOOK: query: create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_smb +PREHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_smb +POSTHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: s + filterExpr: name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Group By Operator + aggregations: count(DISTINCT _col2) + keys: _col0 (type: string), _col2 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_smb +PREHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_smb +POSTHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +PREHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table studenttab10k_smb +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@studenttab10k_smb +POSTHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table studenttab10k_smb +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table studenttab10k_smb +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@studenttab10k_smb +POSTHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table studenttab10k_smb +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table votertab10k_smb +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@votertab10k_smb +POSTHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table votertab10k_smb +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table votertab10k_smb +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@votertab10k_smb +POSTHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table votertab10k_smb +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_smb +PREHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_smb +POSTHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: s + filterExpr: name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Group By Operator + aggregations: count(DISTINCT _col2) + keys: _col0 (type: string), _col2 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_smb +PREHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_smb +POSTHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +PREHOOK: query: create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@studenttab10k_part +POSTHOOK: query: create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@studenttab10k_part +PREHOOK: query: create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@votertab10k_part +POSTHOOK: query: create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@votertab10k_part +PREHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table studenttab10k_part partition (p='foo') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@studenttab10k_part +POSTHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table studenttab10k_part partition (p='foo') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@studenttab10k_part +POSTHOOK: Output: default@studenttab10k_part@p=foo +PREHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table studenttab10k_part partition (p='foo') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@studenttab10k_part@p=foo +POSTHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table studenttab10k_part partition (p='foo') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@studenttab10k_part@p=foo +PREHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table votertab10k_part partition (p='foo') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@votertab10k_part +POSTHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table votertab10k_part partition (p='foo') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@votertab10k_part +POSTHOOK: Output: default@votertab10k_part@p=foo +PREHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table votertab10k_part partition (p='foo') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@votertab10k_part@p=foo +POSTHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table votertab10k_part partition (p='foo') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@votertab10k_part@p=foo +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_part +PREHOOK: Input: default@votertab10k_part +#### A masked pattern was here #### +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_part +POSTHOOK: Input: default@votertab10k_part +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: s + filterExpr: ((p = 'bar') and name is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((p = 'bar') and name is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Group By Operator + aggregations: count(DISTINCT _col2) + keys: _col0 (type: string), _col2 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_part +PREHOOK: Input: default@votertab10k_part +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_part +POSTHOOK: Input: default@votertab10k_part +#### A masked pattern was here #### +PREHOOK: query: drop table studenttab10k +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@studenttab10k +PREHOOK: Output: default@studenttab10k +POSTHOOK: query: drop table studenttab10k +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@studenttab10k +POSTHOOK: Output: default@studenttab10k +PREHOOK: query: drop table votertab10k +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@votertab10k +PREHOOK: Output: default@votertab10k +POSTHOOK: query: drop table votertab10k +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@votertab10k +POSTHOOK: Output: default@votertab10k +PREHOOK: query: drop table studenttab10k_smb +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@studenttab10k_smb +PREHOOK: Output: default@studenttab10k_smb +POSTHOOK: query: drop table studenttab10k_smb +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@studenttab10k_smb +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: drop table votertab10k_smb +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@votertab10k_smb +PREHOOK: Output: default@votertab10k_smb +POSTHOOK: query: drop table votertab10k_smb +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@votertab10k_smb +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: drop table studenttab10k_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@studenttab10k_part +PREHOOK: Output: default@studenttab10k_part +POSTHOOK: query: drop table studenttab10k_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@studenttab10k_part +POSTHOOK: Output: default@studenttab10k_part +PREHOOK: query: drop table votertab10k_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@votertab10k_part +PREHOOK: Output: default@votertab10k_part +POSTHOOK: query: drop table votertab10k_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@votertab10k_part +POSTHOOK: Output: default@votertab10k_part diff --git a/ql/src/test/results/clientpositive/auto_join33.q.out b/ql/src/test/results/clientpositive/auto_join33.q.out new file mode 100644 index 0000000000..fb6ddab51e --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join33.q.out @@ -0,0 +1,106 @@ +PREHOOK: query: explain +SELECT * FROM + (SELECT * FROM src WHERE key+1 < 10) a + JOIN + (SELECT * FROM src WHERE key+2 < 10) b + ON a.key+1=b.key+2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM + (SELECT * FROM src WHERE key+1 < 10) a + JOIN + (SELECT * FROM src WHERE key+2 < 10) b + ON a.key+1=b.key+2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src + TableScan + alias: src + filterExpr: (((UDFToDouble(key) + 1.0D) < 10.0D) and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((UDFToDouble(key) + 1.0D) < 10.0D) and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 (UDFToDouble(_col0) + 1.0D) (type: double) + 1 (UDFToDouble(_col0) + 2.0D) (type: double) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (((UDFToDouble(key) + 2.0D) < 10.0D) and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((UDFToDouble(key) + 2.0D) < 10.0D) and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (UDFToDouble(_col0) + 1.0D) (type: double) + 1 (UDFToDouble(_col0) + 2.0D) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM + (SELECT * FROM src WHERE key+1 < 10) a + JOIN + (SELECT * FROM src WHERE key+2 < 10) b + ON a.key+1=b.key+2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM + (SELECT * FROM src WHERE key+1 < 10) a + JOIN + (SELECT * FROM src WHERE key+2 < 10) b + ON a.key+1=b.key+2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +5 val_5 4 val_4 +5 val_5 4 val_4 +5 val_5 4 val_4 diff --git a/ql/src/test/results/clientpositive/auto_join4.q.out b/ql/src/test/results/clientpositive/auto_join4.q.out new file mode 100644 index 0000000000..675dac765a --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join4.q.out @@ -0,0 +1,214 @@ +PREHOOK: query: CREATE TABLE dest1_n115(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n115 +POSTHOOK: query: CREATE TABLE dest1_n115(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n115 +PREHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n115 SELECT c.c1, c.c2, c.c3, c.c4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n115 +POSTHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n115 SELECT c.c1, c.c2, c.c3, c.c4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n115 +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:src2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:src2 + TableScan + alias: src2 + filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n115 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n115 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1_n115 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n115 SELECT c.c1, c.c2, c.c3, c.c4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n115 +POSTHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n115 SELECT c.c1, c.c2, c.c3, c.c4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n115 +POSTHOOK: Lineage: dest1_n115.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n115.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n115.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n115.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1_n115.c1,dest1_n115.c2,dest1_n115.c3,dest1_n115.c4)) FROM dest1_n115 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n115 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1_n115.c1,dest1_n115.c2,dest1_n115.c3,dest1_n115.c4)) FROM dest1_n115 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n115 +#### A masked pattern was here #### +5079148035 diff --git a/ql/src/test/results/clientpositive/auto_join5.q.out b/ql/src/test/results/clientpositive/auto_join5.q.out new file mode 100644 index 0000000000..6da8d4aab4 --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join5.q.out @@ -0,0 +1,214 @@ +PREHOOK: query: CREATE TABLE dest1_n64(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n64 +POSTHOOK: query: CREATE TABLE dest1_n64(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n64 +PREHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + RIGHT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n64 SELECT c.c1, c.c2, c.c3, c.c4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n64 +POSTHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + RIGHT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n64 SELECT c.c1, c.c2, c.c3, c.c4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n64 +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src1 + TableScan + alias: src1 + filterExpr: ((UDFToDouble(key) < 20.0D) and (UDFToDouble(key) > 15.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) < 20.0D) and (UDFToDouble(key) > 15.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n64 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n64 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1_n64 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + RIGHT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n64 SELECT c.c1, c.c2, c.c3, c.c4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n64 +POSTHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + RIGHT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n64 SELECT c.c1, c.c2, c.c3, c.c4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n64 +POSTHOOK: Lineage: dest1_n64.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n64.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n64.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n64.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1_n64.c1,dest1_n64.c2,dest1_n64.c3,dest1_n64.c4)) FROM dest1_n64 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n64 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1_n64.c1,dest1_n64.c2,dest1_n64.c3,dest1_n64.c4)) FROM dest1_n64 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n64 +#### A masked pattern was here #### +9766083196 diff --git a/ql/src/test/results/clientpositive/auto_join6.q.out b/ql/src/test/results/clientpositive/auto_join6.q.out new file mode 100644 index 0000000000..8996b2d6c5 --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join6.q.out @@ -0,0 +1,213 @@ +PREHOOK: query: CREATE TABLE dest1_n9(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n9 +POSTHOOK: query: CREATE TABLE dest1_n9(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n9 +PREHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n9 SELECT c.c1, c.c2, c.c3, c.c4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n9 +POSTHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n9 SELECT c.c1, c.c2, c.c3, c.c4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n9 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: src2 + filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 39160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 20900 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 110 Data size: 20900 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n9 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 110 Data size: 20900 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n9 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1_n9 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n9 SELECT c.c1, c.c2, c.c3, c.c4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n9 +POSTHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n9 SELECT c.c1, c.c2, c.c3, c.c4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n9 +POSTHOOK: Lineage: dest1_n9.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n9.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n9.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n9.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1_n9.c1,dest1_n9.c2,dest1_n9.c3,dest1_n9.c4)) FROM dest1_n9 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n9 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1_n9.c1,dest1_n9.c2,dest1_n9.c3,dest1_n9.c4)) FROM dest1_n9 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n9 +#### A masked pattern was here #### +2607643291 diff --git a/ql/src/test/results/clientpositive/auto_join7.q.out b/ql/src/test/results/clientpositive/auto_join7.q.out new file mode 100644 index 0000000000..83a46056b2 --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join7.q.out @@ -0,0 +1,282 @@ +PREHOOK: query: CREATE TABLE dest1_n147(c1 INT, c2 STRING, c3 INT, c4 STRING, c5 INT, c6 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n147 +POSTHOOK: query: CREATE TABLE dest1_n147(c1 INT, c2 STRING, c3 INT, c4 STRING, c5 INT, c6 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n147 +PREHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + FROM src src3 SELECT src3.key AS c5, src3.value AS c6 WHERE src3.key > 20 and src3.key < 25 + ) c + ON (a.c1 = c.c5) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 +) c +INSERT OVERWRITE TABLE dest1_n147 SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n147 +POSTHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + FROM src src3 SELECT src3.key AS c5, src3.value AS c6 WHERE src3.key > 20 and src3.key < 25 + ) c + ON (a.c1 = c.c5) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 +) c +INSERT OVERWRITE TABLE dest1_n147 SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n147 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-8 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: src2 + filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 39160 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_2:src3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_2:src3 + TableScan + alias: src3 + filterExpr: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 110 Data size: 58740 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col4) (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n147 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n147 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1_n147 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + FROM src src3 SELECT src3.key AS c5, src3.value AS c6 WHERE src3.key > 20 and src3.key < 25 + ) c + ON (a.c1 = c.c5) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 +) c +INSERT OVERWRITE TABLE dest1_n147 SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n147 +POSTHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + FROM src src3 SELECT src3.key AS c5, src3.value AS c6 WHERE src3.key > 20 and src3.key < 25 + ) c + ON (a.c1 = c.c5) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 +) c +INSERT OVERWRITE TABLE dest1_n147 SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n147 +POSTHOOK: Lineage: dest1_n147.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n147.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n147.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n147.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n147.c5 EXPRESSION [(src)src3.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n147.c6 SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1_n147.c1,dest1_n147.c2,dest1_n147.c3,dest1_n147.c4,dest1_n147.c5,dest1_n147.c6)) FROM dest1_n147 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n147 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1_n147.c1,dest1_n147.c2,dest1_n147.c3,dest1_n147.c4,dest1_n147.c5,dest1_n147.c6)) FROM dest1_n147 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n147 +#### A masked pattern was here #### +-2315698213 diff --git a/ql/src/test/results/clientpositive/auto_join8.q.out b/ql/src/test/results/clientpositive/auto_join8.q.out new file mode 100644 index 0000000000..5879fbc74b --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join8.q.out @@ -0,0 +1,217 @@ +PREHOOK: query: CREATE TABLE dest1_n3(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n3 +POSTHOOK: query: CREATE TABLE dest1_n3(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n3 +PREHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n3 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n3 +POSTHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n3 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n3 +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:src2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:src2 + TableScan + alias: src2 + filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col2 is null (type: boolean) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), null (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n3 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1_n3 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n3 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n3 +POSTHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1_n3 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n3 +POSTHOOK: Lineage: dest1_n3.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n3.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n3.c3 EXPRESSION [] +POSTHOOK: Lineage: dest1_n3.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1_n3.c1,dest1_n3.c2,dest1_n3.c3,dest1_n3.c4)) FROM dest1_n3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1_n3.c1,dest1_n3.c2,dest1_n3.c3,dest1_n3.c4)) FROM dest1_n3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n3 +#### A masked pattern was here #### +-7158439905 diff --git a/ql/src/test/results/clientpositive/auto_join9.q.out b/ql/src/test/results/clientpositive/auto_join9.q.out new file mode 100644 index 0000000000..41a7bc23dd --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join9.q.out @@ -0,0 +1,176 @@ +PREHOOK: query: CREATE TABLE dest1_n142(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n142 +POSTHOOK: query: CREATE TABLE dest1_n142(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n142 +PREHOOK: query: explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n142 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1_n142 +POSTHOOK: query: explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n142 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1_n142 +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src1 + TableScan + alias: src1 + filterExpr: ((ds = '2008-04-08') and (hr = '12') and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n142 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n142 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1_n142 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n142 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1_n142 +POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n142 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1_n142 +POSTHOOK: Lineage: dest1_n142.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n142.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1_n142.key,dest1_n142.value)) FROM dest1_n142 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n142 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1_n142.key,dest1_n142.value)) FROM dest1_n142 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n142 +#### A masked pattern was here #### +101861029915 diff --git a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out new file mode 100644 index 0000000000..0378c65eae --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out @@ -0,0 +1,704 @@ +PREHOOK: query: create table testsrc ( `key` int,`val` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testsrc +POSTHOOK: query: create table testsrc ( `key` int,`val` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testsrc +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table testsrc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@testsrc +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table testsrc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@testsrc +PREHOOK: query: drop table if exists orderpayment_small +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists orderpayment_small +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orderpayment_small +POSTHOOK: query: create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orderpayment_small +PREHOOK: query: insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@testsrc +PREHOOK: Output: default@orderpayment_small +POSTHOOK: query: insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testsrc +POSTHOOK: Output: default@orderpayment_small +POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE [] +POSTHOOK: Lineage: orderpayment_small.date SIMPLE [] +POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE [] +POSTHOOK: Lineage: orderpayment_small.time SIMPLE [] +POSTHOOK: Lineage: orderpayment_small.userid SIMPLE [] +PREHOOK: query: drop table if exists user_small +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists user_small +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table user_small( userid int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@user_small +POSTHOOK: query: create table user_small( userid int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@user_small +PREHOOK: query: insert overwrite table user_small select key from testsrc tablesample (100 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@testsrc +PREHOOK: Output: default@user_small +POSTHOOK: query: insert overwrite table user_small select key from testsrc tablesample (100 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testsrc +POSTHOOK: Output: default@user_small +POSTHOOK: Lineage: user_small.userid SIMPLE [(testsrc)testsrc.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: explain extended SELECT + `dim_pay_date`.`date` + , `deal`.`dealid` +FROM `orderpayment_small` `orderpayment` +JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date` +JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` +JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` +JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orderpayment_small +PREHOOK: Input: default@user_small +#### A masked pattern was here #### +POSTHOOK: query: explain extended SELECT + `dim_pay_date`.`date` + , `deal`.`dealid` +FROM `orderpayment_small` `orderpayment` +JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date` +JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` +JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` +JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orderpayment_small +POSTHOOK: Input: default@user_small +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT `t4`.`date`, `t6`.`dealid` +FROM (SELECT `userid` +FROM `default`.`user_small` +WHERE `userid` IS NOT NULL) AS `t0` +INNER JOIN ((SELECT `dealid`, `date`, `cityid`, `userid` +FROM `default`.`orderpayment_small` +WHERE `date` IS NOT NULL AND `dealid` IS NOT NULL AND `cityid` IS NOT NULL AND `userid` IS NOT NULL) AS `t2` +INNER JOIN (SELECT `date` +FROM `default`.`orderpayment_small` +WHERE `date` IS NOT NULL) AS `t4` ON `t2`.`date` = `t4`.`date` +INNER JOIN (SELECT `dealid` +FROM `default`.`orderpayment_small` +WHERE `dealid` IS NOT NULL) AS `t6` ON `t2`.`dealid` = `t6`.`dealid` +INNER JOIN (SELECT `cityid` +FROM `default`.`orderpayment_small` +WHERE `cityid` IS NOT NULL) AS `t8` ON `t2`.`cityid` = `t8`.`cityid`) ON `t0`.`userid` = `t2`.`userid` +LIMIT 5 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orderpayment + filterExpr: (date is not null and dealid is not null and cityid is not null and userid is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (date is not null and dealid is not null and cityid is not null and userid is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: dealid (type: int), date (type: string), cityid (type: int), userid (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + auto parallelism: false + TableScan + alias: dim_pay_date + filterExpr: date is not null (type: boolean) + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: date is not null (type: boolean) + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: date (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: orderpayment_small + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns dealid,date,time,cityid,userid + columns.comments + columns.types int:string:string:int:int +#### A masked pattern was here #### + name default.orderpayment_small + numFiles 1 + numRows 1 + rawDataSize 36 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 37 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns dealid,date,time,cityid,userid + columns.comments + columns.types int:string:string:int:int +#### A masked pattern was here #### + name default.orderpayment_small + numFiles 1 + numRows 1 + rawDataSize 36 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 37 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orderpayment_small + name: default.orderpayment_small + Truncated Path -> Alias: + /orderpayment_small [$hdt$_1:orderpayment, $hdt$_2:dim_pay_date] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col2,_col3,_col4 + columns.types int,int,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: string) + auto parallelism: false + TableScan + alias: deal + filterExpr: dealid is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: dealid is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: dealid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col2,_col3,_col4 + columns.types int,int,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col2,_col3,_col4 + columns.types int,int,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +#### A masked pattern was here #### + Partition + base file name: orderpayment_small + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns dealid,date,time,cityid,userid + columns.comments + columns.types int:string:string:int:int +#### A masked pattern was here #### + name default.orderpayment_small + numFiles 1 + numRows 1 + rawDataSize 36 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 37 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns dealid,date,time,cityid,userid + columns.comments + columns.types int:string:string:int:int +#### A masked pattern was here #### + name default.orderpayment_small + numFiles 1 + numRows 1 + rawDataSize 36 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 37 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orderpayment_small + name: default.orderpayment_small + Truncated Path -> Alias: + /orderpayment_small [$hdt$_3:deal] +#### A masked pattern was here #### + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col2,_col3,_col4,_col5 + columns.types int,int,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col3 (type: int), _col4 (type: string), _col5 (type: int) + auto parallelism: false + TableScan + alias: order_city + filterExpr: cityid is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: cityid is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cityid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col2,_col3,_col4,_col5 + columns.types int,int,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col2,_col3,_col4,_col5 + columns.types int,int,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +#### A masked pattern was here #### + Partition + base file name: orderpayment_small + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns dealid,date,time,cityid,userid + columns.comments + columns.types int:string:string:int:int +#### A masked pattern was here #### + name default.orderpayment_small + numFiles 1 + numRows 1 + rawDataSize 36 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 37 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns dealid,date,time,cityid,userid + columns.comments + columns.types int:string:string:int:int +#### A masked pattern was here #### + name default.orderpayment_small + numFiles 1 + numRows 1 + rawDataSize 36 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 37 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orderpayment_small + name: default.orderpayment_small + Truncated Path -> Alias: + /orderpayment_small [$hdt$_4:order_city] +#### A masked pattern was here #### + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col3,_col4,_col5 + columns.types int,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col4 (type: string), _col5 (type: int) + auto parallelism: false + TableScan + alias: user + filterExpr: userid is not null (type: boolean) + Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: userid is not null (type: boolean) + Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: userid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10006 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col3,_col4,_col5 + columns.types int,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col3,_col4,_col5 + columns.types int,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +#### A masked pattern was here #### + Partition + base file name: user_small + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns userid + columns.comments + columns.types int +#### A masked pattern was here #### + name default.user_small + numFiles 1 + numRows 100 + rawDataSize 288 + serialization.ddl struct user_small { i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 388 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns userid + columns.comments + columns.types int +#### A masked pattern was here #### + name default.user_small + numFiles 1 + numRows 100 + rawDataSize 288 + serialization.ddl struct user_small { i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 388 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.user_small + name: default.user_small + Truncated Path -> Alias: + /user_small [$hdt$_0:user] +#### A masked pattern was here #### + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5 + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col4 (type: string), _col5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + `dim_pay_date`.`date` + , `deal`.`dealid` +FROM `orderpayment_small` `orderpayment` +JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date` +JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` +JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` +JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orderpayment_small +PREHOOK: Input: default@user_small +#### A masked pattern was here #### +POSTHOOK: query: SELECT + `dim_pay_date`.`date` + , `deal`.`dealid` +FROM `orderpayment_small` `orderpayment` +JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date` +JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` +JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` +JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orderpayment_small +POSTHOOK: Input: default@user_small +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/auto_join_stats.q.out b/ql/src/test/results/clientpositive/auto_join_stats.q.out new file mode 100644 index 0000000000..d2ca52f75b --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join_stats.q.out @@ -0,0 +1,603 @@ +PREHOOK: query: create table smalltable_n0(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable_n0 +POSTHOOK: query: create table smalltable_n0(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable_n0 +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable_n0 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable_n0 +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable_n0 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable_n0 +PREHOOK: query: analyze table smalltable_n0 compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable_n0 +PREHOOK: Output: default@smalltable_n0 +POSTHOOK: query: analyze table smalltable_n0 compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable_n0 +POSTHOOK: Output: default@smalltable_n0 +PREHOOK: query: explain select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable_n0 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable_n0 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-8 is a root stage , consists of Stage-10, Stage-11, Stage-1 + Stage-10 has a backup stage: Stage-1 + Stage-6 depends on stages: Stage-10 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-9 + Stage-11 has a backup stage: Stage-1 + Stage-7 depends on stages: Stage-11 + Stage-1 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-8 + Conditional Operator + + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:src2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:src2 + TableScan + alias: src2 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_2:smalltable_n0 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_2:smalltable_n0 + TableScan + alias: smalltable_n0 + filterExpr: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 (_col1 + _col3) (type: double) + 1 _col1 (type: double) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col1 + _col3) (type: double) + 1 _col1 (type: double) + outputColumnNames: _col0, _col2, _col4 + Statistics: Num rows: 870 Data size: 165319 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 870 Data size: 165319 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 870 Data size: 165319 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-11 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src1 + TableScan + alias: src1 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + TableScan + alias: src2 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable_n0 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable_n0 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +PREHOOK: query: create table smalltable2_n0(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable2_n0 +POSTHOOK: query: create table smalltable2_n0(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable2_n0 +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2_n0 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable2_n0 +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2_n0 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable2_n0 +PREHOOK: query: analyze table smalltable_n0 compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable_n0 +PREHOOK: Output: default@smalltable_n0 +POSTHOOK: query: analyze table smalltable_n0 compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable_n0 +POSTHOOK: Output: default@smalltable_n0 +PREHOOK: query: explain select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) JOIN smalltable2_n0 ON (src1.key + src2.key = smalltable2_n0.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable2_n0 +PREHOOK: Input: default@smalltable_n0 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) JOIN smalltable2_n0 ON (src1.key + src2.key = smalltable2_n0.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable2_n0 +POSTHOOK: Input: default@smalltable_n0 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-11 is a root stage , consists of Stage-13, Stage-14, Stage-1 + Stage-13 has a backup stage: Stage-1 + Stage-9 depends on stages: Stage-13 + Stage-12 depends on stages: Stage-1, Stage-9, Stage-10 + Stage-7 depends on stages: Stage-12 + Stage-14 has a backup stage: Stage-1 + Stage-10 depends on stages: Stage-14 + Stage-1 + Stage-0 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-11 + Conditional Operator + + Stage: Stage-13 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_2:src2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_2:src2 + TableScan + alias: src2 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:smalltable_n0 + Fetch Operator + limit: -1 + $hdt$_3:smalltable2_n0 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:smalltable_n0 + TableScan + alias: smalltable_n0 + filterExpr: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 (_col1 + _col3) (type: double) + 1 _col1 (type: double) + $hdt$_3:smalltable2_n0 + TableScan + alias: smalltable2_n0 + filterExpr: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToDouble(key) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 (_col1 + _col3) (type: double) + 1 _col0 (type: double) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col1 + _col3) (type: double) + 1 _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 870 Data size: 165319 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col1 + _col3) (type: double) + 1 _col1 (type: double) + outputColumnNames: _col0, _col2, _col5 + Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:src1 + TableScan + alias: src1 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + TableScan + alias: src2 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) JOIN smalltable2_n0 ON (src1.key + src2.key = smalltable2_n0.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable2_n0 +PREHOOK: Input: default@smalltable_n0 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) JOIN smalltable2_n0 ON (src1.key + src2.key = smalltable2_n0.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable2_n0 +POSTHOOK: Input: default@smalltable_n0 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +4 4 8 +4 4 8 diff --git a/ql/src/test/results/clientpositive/auto_join_stats2.q.out b/ql/src/test/results/clientpositive/auto_join_stats2.q.out new file mode 100644 index 0000000000..235a2bacc5 --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join_stats2.q.out @@ -0,0 +1,322 @@ +PREHOOK: query: create table smalltable(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable +POSTHOOK: query: create table smalltable(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable +Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:smalltable + Fetch Operator + limit: -1 + $hdt$_2:src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:smalltable + TableScan + alias: smalltable + filterExpr: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + $hdt$_2:src1 + TableScan + alias: src1 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 154000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 + _col1) = _col3) (type: boolean) + Statistics: Num rows: 275 Data size: 77000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 77000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 77000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +PREHOOK: query: create table smalltable2(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: analyze table smalltable compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Output: default@smalltable +POSTHOOK: query: analyze table smalltable compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@smalltable2 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@smalltable2 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-10 is a root stage + Stage-7 depends on stages: Stage-10 + Stage-0 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:smalltable + Fetch Operator + limit: -1 + $hdt$_1:src1 + Fetch Operator + limit: -1 + $hdt$_3:smalltable2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:smalltable + TableScan + alias: smalltable + filterExpr: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 (_col1 + _col3) (type: double) + 1 _col1 (type: double) + $hdt$_1:src1 + TableScan + alias: src1 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + $hdt$_3:smalltable2 + TableScan + alias: smalltable2 + filterExpr: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToDouble(key) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 (_col1 + _col3) (type: double) + 1 _col0 (type: double) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col1 + _col3) (type: double) + 1 _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 870 Data size: 165319 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col1 + _col3) (type: double) + 1 _col1 (type: double) + outputColumnNames: _col0, _col2, _col5 + Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@smalltable2 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@smalltable2 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +4 4 8 +4 4 8 diff --git a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out new file mode 100644 index 0000000000..d64581c66c --- /dev/null +++ b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out @@ -0,0 +1,1153 @@ +PREHOOK: query: explain +select a.* from src a join src b on a.key=b.key order by key, value limit 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select a.* from src a join src b on a.key=b.key order by key, value limit 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 + Stage-7 has a backup stage: Stage-1 + Stage-4 depends on stages: Stage-7 + Stage-2 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-8 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-8 + Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-6 + Conditional Operator + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:b + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 40 + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:a + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 40 + Processor Tree: + ListSink + +PREHOOK: query: select a.* from src a join src b on a.key=b.key order by key, value limit 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select a.* from src a join src b on a.key=b.key order by key, value limit 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +RUN: Stage-6:CONDITIONAL +RUN: Stage-7:MAPREDLOCAL +RUN: Stage-4:MAPRED +RUN: Stage-2:MAPRED +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +PREHOOK: query: explain +select a.* from src a join src b on a.key=b.key join src c on a.value=c.value order by a.key, a.value limit 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select a.* from src a join src b on a.key=b.key join src c on a.value=c.value order by a.key, a.value limit 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-11 is a root stage , consists of Stage-14, Stage-15, Stage-1 + Stage-14 has a backup stage: Stage-1 + Stage-9 depends on stages: Stage-14 + Stage-8 depends on stages: Stage-1, Stage-9, Stage-10 , consists of Stage-12, Stage-7, Stage-2 + Stage-12 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-12 + Stage-3 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-7 has a backup stage: Stage-2 + Stage-2 + Stage-15 has a backup stage: Stage-1 + Stage-10 depends on stages: Stage-15 + Stage-1 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-11 + Conditional Operator + + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:b + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_2:c + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_2:c + TableScan + alias: c + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1288 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 1288 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1288 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 40 + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: c + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1288 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + TableScan + alias: c + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1288 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-15 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:a + TableScan + alias: a + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 40 + Processor Tree: + ListSink + +PREHOOK: query: select a.* from src a join src b on a.key=b.key join src c on a.value=c.value order by a.key, a.value limit 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select a.* from src a join src b on a.key=b.key join src c on a.value=c.value order by a.key, a.value limit 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +RUN: Stage-11:CONDITIONAL +RUN: Stage-14:MAPREDLOCAL +RUN: Stage-9:MAPRED +RUN: Stage-8:CONDITIONAL +RUN: Stage-12:MAPREDLOCAL +RUN: Stage-6:MAPRED +RUN: Stage-3:MAPRED +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +PREHOOK: query: explain +select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 order by a.key, a.value limit 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 order by a.key, a.value limit 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-11 is a root stage , consists of Stage-14, Stage-15, Stage-1 + Stage-14 has a backup stage: Stage-1 + Stage-9 depends on stages: Stage-14 + Stage-8 depends on stages: Stage-1, Stage-9, Stage-10 , consists of Stage-12, Stage-7, Stage-2 + Stage-12 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-12 + Stage-3 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-7 has a backup stage: Stage-2 + Stage-2 + Stage-15 has a backup stage: Stage-1 + Stage-10 depends on stages: Stage-15 + Stage-1 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-11 + Conditional Operator + + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_2:b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_2:b + TableScan + alias: b + filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((UDFToDouble(key) > 100.0D) and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 100.0D) and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:c + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:c + TableScan + alias: c + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 40 + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: c + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + TableScan + alias: c + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-15 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:a + TableScan + alias: a + filterExpr: ((UDFToDouble(key) > 100.0D) and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 100.0D) and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: b + filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((UDFToDouble(key) > 100.0D) and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 100.0D) and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: b + filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 40 + Processor Tree: + ListSink + +PREHOOK: query: select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 order by a.key, a.value limit 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 order by a.key, a.value limit 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +RUN: Stage-11:CONDITIONAL +RUN: Stage-14:MAPREDLOCAL +RUN: Stage-9:MAPRED +RUN: Stage-8:CONDITIONAL +RUN: Stage-12:MAPREDLOCAL +RUN: Stage-6:MAPRED +RUN: Stage-3:MAPRED +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +105 val_105 +111 val_111 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +PREHOOK: query: select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 order by a.key, a.value limit 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +Hive Runtime Error: Map local work exhausted memory +FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask +ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask +Hive Runtime Error: Map local work exhausted memory +FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask +ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask +POSTHOOK: query: select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 order by a.key, a.value limit 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +RUN: Stage-11:CONDITIONAL +RUN: Stage-14:MAPREDLOCAL +RUN: Stage-1:MAPRED +RUN: Stage-8:CONDITIONAL +RUN: Stage-12:MAPREDLOCAL +RUN: Stage-2:MAPRED +RUN: Stage-3:MAPRED +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +105 val_105 +111 val_111 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_119 diff --git a/ql/src/test/results/clientpositive/llap/avro_date.q.out b/ql/src/test/results/clientpositive/avro_date.q.out similarity index 100% rename from ql/src/test/results/clientpositive/llap/avro_date.q.out rename to ql/src/test/results/clientpositive/avro_date.q.out index 2639a4bf75..ff969a1ffe 100644 --- a/ql/src/test/results/clientpositive/llap/avro_date.q.out +++ b/ql/src/test/results/clientpositive/avro_date.q.out @@ -93,8 +93,8 @@ POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 1411-02-21 1 1947-02-11 1 2012-02-21 1 -8200-02-11 1 2014-02-11 1 +8200-02-11 1 PREHOOK: query: SELECT * FROM avro_date WHERE d!='1947-02-11' PREHOOK: type: QUERY PREHOOK: Input: default@avro_date diff --git a/ql/src/test/results/clientpositive/llap/avro_schema_evolution_native.q.out b/ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out similarity index 94% rename from ql/src/test/results/clientpositive/llap/avro_schema_evolution_native.q.out rename to ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out index 21040ecf32..cd401becad 100644 --- a/ql/src/test/results/clientpositive/llap/avro_schema_evolution_native.q.out +++ b/ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out @@ -259,9 +259,11 @@ STAGE PLANS: TableScan alias: episodes_partitioned_n0 filterExpr: (doctor_pt > 6) (type: boolean) + Statistics: Num rows: 3 Data size: 618 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: title (type: string), air_date (type: string), doctor (type: int), value (type: int), doctor_pt (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 618 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: SELECT * FROM episodes_partitioned_n0 WHERE doctor_pt > 6 @@ -350,28 +352,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: episodes_partitioned_n0 - filterExpr: (doctor_pt > 6) (type: boolean) - Statistics: Num rows: 3 Data size: 618 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: title (type: string), air_date (type: string), doctor (type: int), value (type: int), doctor_pt (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 618 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 618 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs + Map Reduce + Map Operator Tree: + TableScan + alias: episodes_partitioned_n0 + filterExpr: (doctor_pt > 6) (type: boolean) + Statistics: Num rows: 3 Data size: 618 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: title (type: string), air_date (type: string), doctor (type: int), value (type: int), doctor_pt (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 618 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 618 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/avro_timestamp.q.out b/ql/src/test/results/clientpositive/avro_timestamp.q.out similarity index 100% rename from ql/src/test/results/clientpositive/llap/avro_timestamp.q.out rename to ql/src/test/results/clientpositive/avro_timestamp.q.out index fffa6b68ed..0ac216a180 100644 --- a/ql/src/test/results/clientpositive/llap/avro_timestamp.q.out +++ b/ql/src/test/results/clientpositive/avro_timestamp.q.out @@ -88,12 +88,12 @@ POSTHOOK: Input: default@avro_timestamp POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 #### A masked pattern was here #### 0600-02-11 07:08:09.123 1 -1214-02-11 07:08:09.123 1 -1947-02-11 07:08:09.123 1 -2014-02-11 07:08:09.123 1 0847-02-11 07:08:09.123 1 +1214-02-11 07:08:09.123 1 1412-02-21 07:08:09.123 1 +1947-02-11 07:08:09.123 1 2012-02-21 07:08:09.123 1 +2014-02-11 07:08:09.123 1 8200-02-11 07:08:09.123 1 PREHOOK: query: SELECT * FROM avro_timestamp WHERE d!='1947-02-11 07:08:09.123' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/avrotblsjoin.q.out b/ql/src/test/results/clientpositive/avrotblsjoin.q.out similarity index 97% rename from ql/src/test/results/clientpositive/llap/avrotblsjoin.q.out rename to ql/src/test/results/clientpositive/avrotblsjoin.q.out index 06b06c7687..3e6969abb8 100644 --- a/ql/src/test/results/clientpositive/llap/avrotblsjoin.q.out +++ b/ql/src/test/results/clientpositive/avrotblsjoin.q.out @@ -73,7 +73,7 @@ POSTHOOK: Lineage: table1_1.col1 SCRIPT [] POSTHOOK: Lineage: table1_1.col2 SCRIPT [] WARNING: Comparing a bigint and a string may result in a loss of precision. WARNING: Comparing a bigint and a string may result in a loss of precision. -Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select table1_n1.col1, table1_1.* from table1_n1 join table1_1 on table1_n1.col1=table1_1.col1 where table1_1.col1="1" PREHOOK: type: QUERY PREHOOK: Input: default@table1_1 diff --git a/ql/src/test/results/clientpositive/llap/binarysortable_1.q.out b/ql/src/test/results/clientpositive/binarysortable_1.q.out similarity index 52% rename from ql/src/test/results/clientpositive/llap/binarysortable_1.q.out rename to ql/src/test/results/clientpositive/binarysortable_1.q.out index 3240ffcbca..29faaf3f3a 100644 --- a/ql/src/test/results/clientpositive/llap/binarysortable_1.q.out +++ b/ql/src/test/results/clientpositive/binarysortable_1.q.out @@ -46,57 +46,47 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: mytable_n0 - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(value) - keys: key (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: mytable_n0 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial + aggregations: sum(value) + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: regexp_replace(regexp_replace(regexp_replace(_col0, '', '^A'), '', '^@'), '', '^B') (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: regexp_replace(regexp_replace(regexp_replace(_col0, '', '^A'), '', '^@'), '', '^B') (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -123,12 +113,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@mytable_n0 #### A masked pattern was here #### ^@^@^@ 7.0 -^Atest^A 3.0 -test^@^@^A^Atest 6.0 -test^@test 4.0 ^@^A^@ 9.0 ^@test^@ 2.0 ^A^@^A 10.0 ^A^A^A 8.0 +^Atest^A 3.0 a^@bc^A^B^A^@ 1.0 +test^@^@^A^Atest 6.0 +test^@test 4.0 test^Atest 5.0 diff --git a/ql/src/test/results/clientpositive/llap/bool_unknown.q.out b/ql/src/test/results/clientpositive/bool_unknown.q.out similarity index 91% rename from ql/src/test/results/clientpositive/llap/bool_unknown.q.out rename to ql/src/test/results/clientpositive/bool_unknown.q.out index e34deaac7e..8e9b48ccaf 100644 --- a/ql/src/test/results/clientpositive/llap/bool_unknown.q.out +++ b/ql/src/test/results/clientpositive/bool_unknown.q.out @@ -52,8 +52,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### NULL -Warning: Shuffle Join MERGEJOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[21][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select assert_true((select cast(null as boolean)) is unknown) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table diff --git a/ql/src/test/results/clientpositive/llap/bucket1.q.out b/ql/src/test/results/clientpositive/bucket1.q.out similarity index 50% rename from ql/src/test/results/clientpositive/llap/bucket1.q.out rename to ql/src/test/results/clientpositive/bucket1.q.out index e2464f587d..6e8965ff79 100644 --- a/ql/src/test/results/clientpositive/llap/bucket1.q.out +++ b/ql/src/test/results/clientpositive/bucket1.q.out @@ -22,186 +22,153 @@ OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`src` STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: #### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string + Path -> Partition: #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 1 + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count 100 - bucket_field_name key - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - name default.bucket1_1 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucket1_1 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count 100 + bucket_field_name key + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket1_1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct) - auto parallelism: false - Reducer 3 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 + name default.bucket1_1 + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucket1_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket1_1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Dependency Collection + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -233,7 +200,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket1_1 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: #### A masked pattern was here #### @@ -243,6 +210,76 @@ STAGE PLANS: Table: default.bucket1_1 Is Table Level Stats: true + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucket1_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_1.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out similarity index 56% rename from ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_1.q.out rename to ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out index 111d1f8b0f..6b4ac4cdbc 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_1.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out @@ -52,38 +52,29 @@ POSTHOOK: Input: default@test_table1_n5@ds=1 POSTHOOK: Output: default@test_table2_n5@ds=1 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (ds = '1') (type: boolean) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table2_n5 - Execution mode: llap - LLAP IO: no inputs - - Stage: Stage-2 - Dependency Collection + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: (ds = '1') (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2_n5 Stage: Stage-0 Move Operator @@ -97,7 +88,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2_n5 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -178,38 +169,29 @@ POSTHOOK: Input: default@test_table1_n5@ds=1 POSTHOOK: Output: default@test_table2_n5@ds=1 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (ds = '1') (type: boolean) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table2_n5 - Execution mode: llap - LLAP IO: no inputs - - Stage: Stage-2 - Dependency Collection + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: (ds = '1') (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2_n5 Stage: Stage-0 Move Operator @@ -223,7 +205,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2_n5 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -304,38 +286,29 @@ POSTHOOK: Input: default@test_table1_n5@ds=1 POSTHOOK: Output: default@test_table2_n5@ds=1 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (ds = '1') (type: boolean) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), concat(value, value) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table2_n5 - Execution mode: llap - LLAP IO: no inputs - - Stage: Stage-2 - Dependency Collection + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: (ds = '1') (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), concat(value, value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2_n5 Stage: Stage-0 Move Operator @@ -349,7 +322,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2_n5 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -379,94 +352,59 @@ POSTHOOK: Input: default@test_table1_n5@ds=1 POSTHOOK: Output: default@test_table2_n5@ds=1 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (ds = '1') (type: boolean) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: (key + key) (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: (ds = '1') (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (key + key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table2_n5 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) - outputColumnNames: key, value, ds - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2_n5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -480,7 +418,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2_n5 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -488,6 +426,36 @@ STAGE PLANS: Column Types: int, string Table: default.test_table2_n5 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2_n5 PARTITION (ds = '1') SELECT x.k1, concat(x.v1, x.v1) from @@ -510,38 +478,29 @@ POSTHOOK: Input: default@test_table1_n5@ds=1 POSTHOOK: Output: default@test_table2_n5@ds=1 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (ds = '1') (type: boolean) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), concat(value, value) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table2_n5 - Execution mode: llap - LLAP IO: no inputs - - Stage: Stage-2 - Dependency Collection + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: (ds = '1') (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), concat(value, value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2_n5 Stage: Stage-0 Move Operator @@ -555,7 +514,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2_n5 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_3.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out similarity index 61% rename from ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_3.q.out rename to ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out index b635dde797..0c7826a7a1 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_3.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out @@ -48,38 +48,29 @@ POSTHOOK: Input: default@test_table1_n19@ds=1 POSTHOOK: Output: default@test_table2_n18@ds=1 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (ds = '1') (type: boolean) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), key (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table2_n18 - Execution mode: llap - LLAP IO: no inputs - - Stage: Stage-2 - Dependency Collection + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: (ds = '1') (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string), key (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2_n18 Stage: Stage-0 Move Operator @@ -93,7 +84,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2_n18 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -176,94 +167,59 @@ POSTHOOK: Input: default@test_table1_n19@ds=1 POSTHOOK: Output: default@test_table2_n18@ds=1 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (ds = '1') (type: boolean) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: CAST( key AS STRING) (type: string), UDFToInteger(value) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0, _col1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: (ds = '1') (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( key AS STRING) (type: string), UDFToInteger(value) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table2_n18 - Select Operator - expressions: _col0 (type: string), _col1 (type: int), '1' (type: string) - outputColumnNames: value, key, ds - Statistics: Num rows: 500 Data size: 136500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') - keys: ds (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2_n18 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), '1' (type: string) + outputColumnNames: value, key, ds + Statistics: Num rows: 500 Data size: 136500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + keys: ds (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -277,7 +233,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2_n18 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -285,6 +241,36 @@ STAGE PLANS: Column Types: string, int Table: default.test_table2_n18 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table2_n18 PARTITION (ds = '1') SELECT x.key, x.value from (SELECT a.key, a.value FROM test_table1_n19 a WHERE a.ds = '1')x diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out new file mode 100644 index 0000000000..eda8215d4c --- /dev/null +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out @@ -0,0 +1,470 @@ +PREHOOK: query: CREATE TABLE test_table1_n16 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1_n16 +POSTHOOK: query: CREATE TABLE test_table1_n16 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1_n16 +PREHOOK: query: CREATE TABLE test_table2_n15 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2_n15 +POSTHOOK: query: CREATE TABLE test_table2_n15 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2_n15 +PREHOOK: query: CREATE TABLE test_table3_n8 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key2) SORTED BY (key2) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table3_n8 +POSTHOOK: query: CREATE TABLE test_table3_n8 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key2) SORTED BY (key2) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table3_n8 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1_n16 PARTITION (ds = '1') SELECT * where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1_n16@ds=1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1_n16 PARTITION (ds = '1') SELECT * where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1_n16@ds=1 +POSTHOOK: Lineage: test_table1_n16 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1_n16 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table2_n15 PARTITION (ds = '1') SELECT * where key < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table2_n15@ds=1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table2_n15 PARTITION (ds = '1') SELECT * where key < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table2_n15@ds=1 +POSTHOOK: Lineage: test_table2_n15 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2_n15 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') +SELECT a.key, a.key, concat(a.value, b.value) +FROM test_table1_n16 a JOIN test_table2_n15 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1_n16 +PREHOOK: Input: default@test_table1_n16@ds=1 +PREHOOK: Input: default@test_table2_n15 +PREHOOK: Input: default@test_table2_n15@ds=1 +PREHOOK: Output: default@test_table3_n8@ds=1 +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') +SELECT a.key, a.key, concat(a.value, b.value) +FROM test_table1_n16 a JOIN test_table2_n15 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1_n16 +POSTHOOK: Input: default@test_table1_n16@ds=1 +POSTHOOK: Input: default@test_table2_n15 +POSTHOOK: Input: default@test_table2_n15@ds=1 +POSTHOOK: Output: default@test_table3_n8@ds=1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((ds = '1') and key is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) + outputColumnNames: _col1, _col2 + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: int) + value expressions: _col2 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3_n8 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3_n8 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3_n8 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') +SELECT a.key, a.key, concat(a.value, b.value) +FROM test_table1_n16 a JOIN test_table2_n15 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1_n16 +PREHOOK: Input: default@test_table1_n16@ds=1 +PREHOOK: Input: default@test_table2_n15 +PREHOOK: Input: default@test_table2_n15@ds=1 +PREHOOK: Output: default@test_table3_n8@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') +SELECT a.key, a.key, concat(a.value, b.value) +FROM test_table1_n16 a JOIN test_table2_n15 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1_n16 +POSTHOOK: Input: default@test_table1_n16@ds=1 +POSTHOOK: Input: default@test_table2_n15 +POSTHOOK: Input: default@test_table2_n15@ds=1 +POSTHOOK: Output: default@test_table3_n8@ds=1 +POSTHOOK: Lineage: test_table3_n8 PARTITION(ds=1).key SIMPLE [(test_table1_n16)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3_n8 PARTITION(ds=1).key2 SIMPLE [(test_table1_n16)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3_n8 PARTITION(ds=1).value EXPRESSION [(test_table1_n16)a.FieldSchema(name:value, type:string, comment:null), (test_table2_n15)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from test_table3_n8 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3_n8 +PREHOOK: Input: default@test_table3_n8@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_table3_n8 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3_n8 +POSTHOOK: Input: default@test_table3_n8@ds=1 +#### A masked pattern was here #### +2 2 val_2val_2 1 +PREHOOK: query: select * from test_table3_n8 tablesample (bucket 2 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3_n8 +PREHOOK: Input: default@test_table3_n8@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_table3_n8 tablesample (bucket 2 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3_n8 +POSTHOOK: Input: default@test_table3_n8@ds=1 +#### A masked pattern was here #### +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +4 4 val_4val_4 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +8 8 val_8val_8 1 +9 9 val_9val_9 1 +PREHOOK: query: DROP TABLE test_table3_n8 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table3_n8 +PREHOOK: Output: default@test_table3_n8 +POSTHOOK: query: DROP TABLE test_table3_n8 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table3_n8 +POSTHOOK: Output: default@test_table3_n8 +PREHOOK: query: CREATE TABLE test_table3_n8 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table3_n8 +POSTHOOK: query: CREATE TABLE test_table3_n8 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table3_n8 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') +SELECT a.key, a.value +FROM test_table1_n16 a JOIN test_table2_n15 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1_n16 +PREHOOK: Input: default@test_table1_n16@ds=1 +PREHOOK: Input: default@test_table2_n15 +PREHOOK: Input: default@test_table2_n15@ds=1 +PREHOOK: Output: default@test_table3_n8@ds=1 +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') +SELECT a.key, a.value +FROM test_table1_n16 a JOIN test_table2_n15 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1_n16 +POSTHOOK: Input: default@test_table1_n16@ds=1 +POSTHOOK: Input: default@test_table2_n15 +POSTHOOK: Input: default@test_table2_n15@ds=1 +POSTHOOK: Output: default@test_table3_n8@ds=1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((ds = '1') and key is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3_n8 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3_n8 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3_n8 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') +SELECT a.key, a.value +FROM test_table1_n16 a JOIN test_table2_n15 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1_n16 +PREHOOK: Input: default@test_table1_n16@ds=1 +PREHOOK: Input: default@test_table2_n15 +PREHOOK: Input: default@test_table2_n15@ds=1 +PREHOOK: Output: default@test_table3_n8@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') +SELECT a.key, a.value +FROM test_table1_n16 a JOIN test_table2_n15 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1_n16 +POSTHOOK: Input: default@test_table1_n16@ds=1 +POSTHOOK: Input: default@test_table2_n15 +POSTHOOK: Input: default@test_table2_n15@ds=1 +POSTHOOK: Output: default@test_table3_n8@ds=1 +POSTHOOK: Lineage: test_table3_n8 PARTITION(ds=1).key SIMPLE [(test_table1_n16)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3_n8 PARTITION(ds=1).value SIMPLE [(test_table1_n16)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from test_table3_n8 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3_n8 +PREHOOK: Input: default@test_table3_n8@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_table3_n8 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3_n8 +POSTHOOK: Input: default@test_table3_n8@ds=1 +#### A masked pattern was here #### +0 val_0 1 +0 val_0 1 +0 val_0 1 +0 val_0 1 +0 val_0 1 +0 val_0 1 +0 val_0 1 +0 val_0 1 +0 val_0 1 +2 val_2 1 +4 val_4 1 +8 val_8 1 +9 val_9 1 +PREHOOK: query: select * from test_table3_n8 tablesample (bucket 2 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3_n8 +PREHOOK: Input: default@test_table3_n8@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_table3_n8 tablesample (bucket 2 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3_n8 +POSTHOOK: Input: default@test_table3_n8@ds=1 +#### A masked pattern was here #### +5 val_5 1 +5 val_5 1 +5 val_5 1 +5 val_5 1 +5 val_5 1 +5 val_5 1 +5 val_5 1 +5 val_5 1 +5 val_5 1 +PREHOOK: query: DROP TABLE test_table3_n8 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table3_n8 +PREHOOK: Output: default@test_table3_n8 +POSTHOOK: query: DROP TABLE test_table3_n8 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table3_n8 +POSTHOOK: Output: default@test_table3_n8 diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out new file mode 100644 index 0000000000..9208b93482 --- /dev/null +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out @@ -0,0 +1,458 @@ +PREHOOK: query: CREATE TABLE test_table1_n8 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1_n8 +POSTHOOK: query: CREATE TABLE test_table1_n8 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1_n8 +PREHOOK: query: CREATE TABLE test_table2_n8 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2_n8 +POSTHOOK: query: CREATE TABLE test_table2_n8 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2_n8 +PREHOOK: query: CREATE TABLE test_table3_n5 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table3_n5 +POSTHOOK: query: CREATE TABLE test_table3_n5 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table3_n5 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1_n8 PARTITION (ds = '1') SELECT * where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1_n8@ds=1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1_n8 PARTITION (ds = '1') SELECT * where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1_n8@ds=1 +POSTHOOK: Lineage: test_table1_n8 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1_n8 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table2_n8 PARTITION (ds = '1') SELECT * where key < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table2_n8@ds=1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table2_n8 PARTITION (ds = '1') SELECT * where key < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table2_n8@ds=1 +POSTHOOK: Lineage: test_table2_n8 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2_n8 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') +SELECT a.key, concat(a.value, b.value) +FROM test_table1_n8 a JOIN test_table2_n8 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1_n8 +PREHOOK: Input: default@test_table1_n8@ds=1 +PREHOOK: Input: default@test_table2_n8 +PREHOOK: Input: default@test_table2_n8@ds=1 +PREHOOK: Output: default@test_table3_n5@ds=1 +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') +SELECT a.key, concat(a.value, b.value) +FROM test_table1_n8 a JOIN test_table2_n8 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1_n8 +POSTHOOK: Input: default@test_table1_n8@ds=1 +POSTHOOK: Input: default@test_table2_n8 +POSTHOOK: Input: default@test_table2_n8@ds=1 +POSTHOOK: Output: default@test_table3_n5@ds=1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((ds = '1') and key is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: - + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3_n5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3_n5 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3_n5 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') +SELECT a.key, concat(a.value, b.value) +FROM test_table1_n8 a JOIN test_table2_n8 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1_n8 +PREHOOK: Input: default@test_table1_n8@ds=1 +PREHOOK: Input: default@test_table2_n8 +PREHOOK: Input: default@test_table2_n8@ds=1 +PREHOOK: Output: default@test_table3_n5@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') +SELECT a.key, concat(a.value, b.value) +FROM test_table1_n8 a JOIN test_table2_n8 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1_n8 +POSTHOOK: Input: default@test_table1_n8@ds=1 +POSTHOOK: Input: default@test_table2_n8 +POSTHOOK: Input: default@test_table2_n8@ds=1 +POSTHOOK: Output: default@test_table3_n5@ds=1 +POSTHOOK: Lineage: test_table3_n5 PARTITION(ds=1).key SIMPLE [(test_table1_n8)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3_n5 PARTITION(ds=1).value EXPRESSION [(test_table1_n8)a.FieldSchema(name:value, type:string, comment:null), (test_table2_n8)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from test_table3_n5 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3_n5 +PREHOOK: Input: default@test_table3_n5@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_table3_n5 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3_n5 +POSTHOOK: Input: default@test_table3_n5@ds=1 +#### A masked pattern was here #### +2 val_2val_2 1 +PREHOOK: query: select * from test_table3_n5 tablesample (bucket 2 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3_n5 +PREHOOK: Input: default@test_table3_n5@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_table3_n5 tablesample (bucket 2 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3_n5 +POSTHOOK: Input: default@test_table3_n5@ds=1 +#### A masked pattern was here #### +9 val_9val_9 1 +8 val_8val_8 1 +5 val_5val_5 1 +5 val_5val_5 1 +5 val_5val_5 1 +5 val_5val_5 1 +5 val_5val_5 1 +5 val_5val_5 1 +5 val_5val_5 1 +5 val_5val_5 1 +5 val_5val_5 1 +4 val_4val_4 1 +0 val_0val_0 1 +0 val_0val_0 1 +0 val_0val_0 1 +0 val_0val_0 1 +0 val_0val_0 1 +0 val_0val_0 1 +0 val_0val_0 1 +0 val_0val_0 1 +0 val_0val_0 1 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') +SELECT a.key, concat(a.value, b.value) +FROM +(select key, value from test_table1_n8 where ds = '1') a +JOIN +(select key, value from test_table2_n8 where ds = '1') b +ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1_n8 +PREHOOK: Input: default@test_table1_n8@ds=1 +PREHOOK: Input: default@test_table2_n8 +PREHOOK: Input: default@test_table2_n8@ds=1 +PREHOOK: Output: default@test_table3_n5@ds=1 +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') +SELECT a.key, concat(a.value, b.value) +FROM +(select key, value from test_table1_n8 where ds = '1') a +JOIN +(select key, value from test_table2_n8 where ds = '1') b +ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1_n8 +POSTHOOK: Input: default@test_table1_n8@ds=1 +POSTHOOK: Input: default@test_table2_n8 +POSTHOOK: Input: default@test_table2_n8@ds=1 +POSTHOOK: Output: default@test_table3_n5@ds=1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_table1_n8 + filterExpr: ((ds = '1') and key is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: - + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3_n5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3_n5 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3_n5 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') +SELECT a.key, concat(a.value, b.value) +FROM +(select key, value from test_table1_n8 where ds = '1') a +JOIN +(select key, value from test_table2_n8 where ds = '1') b +ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1_n8 +PREHOOK: Input: default@test_table1_n8@ds=1 +PREHOOK: Input: default@test_table2_n8 +PREHOOK: Input: default@test_table2_n8@ds=1 +PREHOOK: Output: default@test_table3_n5@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') +SELECT a.key, concat(a.value, b.value) +FROM +(select key, value from test_table1_n8 where ds = '1') a +JOIN +(select key, value from test_table2_n8 where ds = '1') b +ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1_n8 +POSTHOOK: Input: default@test_table1_n8@ds=1 +POSTHOOK: Input: default@test_table2_n8 +POSTHOOK: Input: default@test_table2_n8@ds=1 +POSTHOOK: Output: default@test_table3_n5@ds=1 +POSTHOOK: Lineage: test_table3_n5 PARTITION(ds=1).key SIMPLE [(test_table1_n8)test_table1_n8.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3_n5 PARTITION(ds=1).value EXPRESSION [(test_table1_n8)test_table1_n8.FieldSchema(name:value, type:string, comment:null), (test_table2_n8)test_table2_n8.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from test_table3_n5 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3_n5 +PREHOOK: Input: default@test_table3_n5@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_table3_n5 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3_n5 +POSTHOOK: Input: default@test_table3_n5@ds=1 +#### A masked pattern was here #### +2 val_2val_2 1 +PREHOOK: query: select * from test_table3_n5 tablesample (bucket 2 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3_n5 +PREHOOK: Input: default@test_table3_n5@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_table3_n5 tablesample (bucket 2 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3_n5 +POSTHOOK: Input: default@test_table3_n5@ds=1 +#### A masked pattern was here #### +9 val_9val_9 1 +8 val_8val_8 1 +5 val_5val_5 1 +5 val_5val_5 1 +5 val_5val_5 1 +5 val_5val_5 1 +5 val_5val_5 1 +5 val_5val_5 1 +5 val_5val_5 1 +5 val_5val_5 1 +5 val_5val_5 1 +4 val_4val_4 1 +0 val_0val_0 1 +0 val_0val_0 1 +0 val_0val_0 1 +0 val_0val_0 1 +0 val_0val_0 1 +0 val_0val_0 1 +0 val_0val_0 1 +0 val_0val_0 1 +0 val_0val_0 1 diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out new file mode 100644 index 0000000000..d5d7f4b391 --- /dev/null +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out @@ -0,0 +1,448 @@ +PREHOOK: query: CREATE TABLE test_table1_n2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1_n2 +POSTHOOK: query: CREATE TABLE test_table1_n2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1_n2 +PREHOOK: query: CREATE TABLE test_table2_n2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2_n2 +POSTHOOK: query: CREATE TABLE test_table2_n2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2_n2 +PREHOOK: query: CREATE TABLE test_table3_n2 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table3_n2 +POSTHOOK: query: CREATE TABLE test_table3_n2 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table3_n2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1_n2 PARTITION (ds = '1') SELECT * where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1_n2@ds=1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1_n2 PARTITION (ds = '1') SELECT * where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1_n2@ds=1 +POSTHOOK: Lineage: test_table1_n2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1_n2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table2_n2 PARTITION (ds = '1') SELECT * where key < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table2_n2@ds=1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table2_n2 PARTITION (ds = '1') SELECT * where key < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table2_n2@ds=1 +POSTHOOK: Lineage: test_table2_n2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2_n2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') +SELECT a.key, b.key, concat(a.value, b.value) +FROM test_table1_n2 a JOIN test_table2_n2 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1_n2 +PREHOOK: Input: default@test_table1_n2@ds=1 +PREHOOK: Input: default@test_table2_n2 +PREHOOK: Input: default@test_table2_n2@ds=1 +PREHOOK: Output: default@test_table3_n2@ds=1 +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') +SELECT a.key, b.key, concat(a.value, b.value) +FROM test_table1_n2 a JOIN test_table2_n2 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1_n2 +POSTHOOK: Input: default@test_table1_n2@ds=1 +POSTHOOK: Input: default@test_table2_n2 +POSTHOOK: Input: default@test_table2_n2@ds=1 +POSTHOOK: Output: default@test_table3_n2@ds=1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((ds = '1') and key is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col0 (type: int), _col2 (type: int), concat(_col1, _col3) (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3_n2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3_n2 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3_n2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') +SELECT a.key, b.key, concat(a.value, b.value) +FROM test_table1_n2 a JOIN test_table2_n2 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1_n2 +PREHOOK: Input: default@test_table1_n2@ds=1 +PREHOOK: Input: default@test_table2_n2 +PREHOOK: Input: default@test_table2_n2@ds=1 +PREHOOK: Output: default@test_table3_n2@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') +SELECT a.key, b.key, concat(a.value, b.value) +FROM test_table1_n2 a JOIN test_table2_n2 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1_n2 +POSTHOOK: Input: default@test_table1_n2@ds=1 +POSTHOOK: Input: default@test_table2_n2 +POSTHOOK: Input: default@test_table2_n2@ds=1 +POSTHOOK: Output: default@test_table3_n2@ds=1 +POSTHOOK: Lineage: test_table3_n2 PARTITION(ds=1).key SIMPLE [(test_table1_n2)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3_n2 PARTITION(ds=1).key2 SIMPLE [(test_table2_n2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3_n2 PARTITION(ds=1).value EXPRESSION [(test_table1_n2)a.FieldSchema(name:value, type:string, comment:null), (test_table2_n2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from test_table3_n2 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3_n2 +PREHOOK: Input: default@test_table3_n2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_table3_n2 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3_n2 +POSTHOOK: Input: default@test_table3_n2@ds=1 +#### A masked pattern was here #### +2 2 val_2val_2 1 +PREHOOK: query: select * from test_table3_n2 tablesample (bucket 2 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3_n2 +PREHOOK: Input: default@test_table3_n2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_table3_n2 tablesample (bucket 2 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3_n2 +POSTHOOK: Input: default@test_table3_n2@ds=1 +#### A masked pattern was here #### +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +4 4 val_4val_4 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +8 8 val_8val_8 1 +9 9 val_9val_9 1 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') +SELECT b.key, a.key, concat(a.value, b.value) +FROM test_table1_n2 a JOIN test_table2_n2 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1_n2 +PREHOOK: Input: default@test_table1_n2@ds=1 +PREHOOK: Input: default@test_table2_n2 +PREHOOK: Input: default@test_table2_n2@ds=1 +PREHOOK: Output: default@test_table3_n2@ds=1 +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') +SELECT b.key, a.key, concat(a.value, b.value) +FROM test_table1_n2 a JOIN test_table2_n2 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1_n2 +POSTHOOK: Input: default@test_table1_n2@ds=1 +POSTHOOK: Input: default@test_table2_n2 +POSTHOOK: Input: default@test_table2_n2@ds=1 +POSTHOOK: Output: default@test_table3_n2@ds=1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((ds = '1') and key is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col2 (type: int), _col0 (type: int), concat(_col1, _col3) (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3_n2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3_n2 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3_n2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') +SELECT b.key, a.key, concat(a.value, b.value) +FROM test_table1_n2 a JOIN test_table2_n2 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1_n2 +PREHOOK: Input: default@test_table1_n2@ds=1 +PREHOOK: Input: default@test_table2_n2 +PREHOOK: Input: default@test_table2_n2@ds=1 +PREHOOK: Output: default@test_table3_n2@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') +SELECT b.key, a.key, concat(a.value, b.value) +FROM test_table1_n2 a JOIN test_table2_n2 b +ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1_n2 +POSTHOOK: Input: default@test_table1_n2@ds=1 +POSTHOOK: Input: default@test_table2_n2 +POSTHOOK: Input: default@test_table2_n2@ds=1 +POSTHOOK: Output: default@test_table3_n2@ds=1 +POSTHOOK: Lineage: test_table3_n2 PARTITION(ds=1).key SIMPLE [(test_table2_n2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3_n2 PARTITION(ds=1).key2 SIMPLE [(test_table1_n2)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3_n2 PARTITION(ds=1).value EXPRESSION [(test_table1_n2)a.FieldSchema(name:value, type:string, comment:null), (test_table2_n2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from test_table3_n2 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3_n2 +PREHOOK: Input: default@test_table3_n2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_table3_n2 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3_n2 +POSTHOOK: Input: default@test_table3_n2@ds=1 +#### A masked pattern was here #### +2 2 val_2val_2 1 +PREHOOK: query: select * from test_table3_n2 tablesample (bucket 2 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3_n2 +PREHOOK: Input: default@test_table3_n2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_table3_n2 tablesample (bucket 2 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3_n2 +POSTHOOK: Input: default@test_table3_n2@ds=1 +#### A masked pattern was here #### +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +0 0 val_0val_0 1 +4 4 val_4val_4 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +5 5 val_5val_5 1 +8 8 val_8val_8 1 +9 9 val_9val_9 1 diff --git a/ql/src/test/results/clientpositive/case_sensitivity.q.out b/ql/src/test/results/clientpositive/case_sensitivity.q.out new file mode 100644 index 0000000000..fdcf86b1da --- /dev/null +++ b/ql/src/test/results/clientpositive/case_sensitivity.q.out @@ -0,0 +1,168 @@ +PREHOOK: query: CREATE TABLE DEST1_n129(Key INT, VALUE STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1_n129 +POSTHOOK: query: CREATE TABLE DEST1_n129(Key INT, VALUE STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1_n129 +PREHOOK: query: EXPLAIN +FROM SRC_THRIFT +INSERT OVERWRITE TABLE dest1_n129 SELECT src_Thrift.LINT[1], src_thrift.lintstring[0].MYSTRING where src_thrift.liNT[0] > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_thrift +PREHOOK: Output: default@dest1_n129 +POSTHOOK: query: EXPLAIN +FROM SRC_THRIFT +INSERT OVERWRITE TABLE dest1_n129 SELECT src_Thrift.LINT[1], src_thrift.lintstring[0].MYSTRING where src_thrift.liNT[0] > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_thrift +POSTHOOK: Output: default@dest1_n129 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src_thrift + filterExpr: (lint[0] > 0) (type: boolean) + Statistics: Num rows: 11 Data size: 29480 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (lint[0] > 0) (type: boolean) + Statistics: Num rows: 3 Data size: 8039 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lint[1] (type: int), lintstring[0].mystring (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 8039 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 8039 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n129 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 8039 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3544 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 3544 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n129 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1_n129 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n129 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n129 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: FROM SRC_THRIFT +INSERT OVERWRITE TABLE dest1_n129 SELECT src_Thrift.LINT[1], src_thrift.lintstring[0].MYSTRING where src_thrift.liNT[0] > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_thrift +PREHOOK: Output: default@dest1_n129 +POSTHOOK: query: FROM SRC_THRIFT +INSERT OVERWRITE TABLE dest1_n129 SELECT src_Thrift.LINT[1], src_thrift.lintstring[0].MYSTRING where src_thrift.liNT[0] > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_thrift +POSTHOOK: Output: default@dest1_n129 +POSTHOOK: Lineage: dest1_n129.key EXPRESSION [(src_thrift)src_thrift.FieldSchema(name:lint, type:array, comment:from deserializer), ] +POSTHOOK: Lineage: dest1_n129.value EXPRESSION [(src_thrift)src_thrift.FieldSchema(name:lintstring, type:array>, comment:from deserializer), ] +PREHOOK: query: SELECT DEST1_n129.* FROM Dest1_n129 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n129 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1_n129.* FROM Dest1_n129 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n129 +#### A masked pattern was here #### +2 1 +4 8 +6 27 +8 64 +10 125 +12 216 +14 343 +16 512 +18 729 diff --git a/ql/src/test/results/clientpositive/cast1.q.out b/ql/src/test/results/clientpositive/cast1.q.out new file mode 100644 index 0000000000..c4e76f59ec --- /dev/null +++ b/ql/src/test/results/clientpositive/cast1.q.out @@ -0,0 +1,161 @@ +PREHOOK: query: CREATE TABLE dest1_n151(c1 INT, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 INT, c6 STRING, c7 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n151 +POSTHOOK: query: CREATE TABLE dest1_n151(c1 INT, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 INT, c6 STRING, c7 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n151 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1_n151 SELECT 3 + 2, 3.0 + 2, 3 + 2.0, 3.0 + 2.0, 3 + CAST(2.0 AS INT) + CAST(CAST(0 AS SMALLINT) AS INT), CAST(1 AS BOOLEAN), CAST(TRUE AS INT) WHERE src.key = 86 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n151 +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1_n151 SELECT 3 + 2, 3.0 + 2, 3 + 2.0, 3.0 + 2.0, 3 + CAST(2.0 AS INT) + CAST(CAST(0 AS SMALLINT) AS INT), CAST(1 AS BOOLEAN), CAST(TRUE AS INT) WHERE src.key = 86 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n151 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (UDFToDouble(key) = 86.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) = 86.0D) (type: boolean) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 5 (type: int), 5.0D (type: double), 5.0D (type: double), 5.0D (type: double), 5 (type: int), 'TRUE' (type: string), 1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 31000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 31000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n151 + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: string), _col6 (type: int) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7 + Statistics: Num rows: 250 Data size: 31000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 2984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n151 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7 + Column Types: int, double, double, double, int, string, int + Table: default.dest1_n151 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n151 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n151 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n151 SELECT 3 + 2, 3.0 + 2, 3 + 2.0, 3.0 + 2.0, 3 + CAST(2.0 AS INT) + CAST(CAST(0 AS SMALLINT) AS INT), CAST(1 AS BOOLEAN), CAST(TRUE AS INT) WHERE src.key = 86 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n151 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n151 SELECT 3 + 2, 3.0 + 2, 3 + 2.0, 3.0 + 2.0, 3 + CAST(2.0 AS INT) + CAST(CAST(0 AS SMALLINT) AS INT), CAST(1 AS BOOLEAN), CAST(TRUE AS INT) WHERE src.key = 86 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n151 +POSTHOOK: Lineage: dest1_n151.c1 SIMPLE [] +POSTHOOK: Lineage: dest1_n151.c2 EXPRESSION [] +POSTHOOK: Lineage: dest1_n151.c3 EXPRESSION [] +POSTHOOK: Lineage: dest1_n151.c4 EXPRESSION [] +POSTHOOK: Lineage: dest1_n151.c5 SIMPLE [] +POSTHOOK: Lineage: dest1_n151.c6 EXPRESSION [] +POSTHOOK: Lineage: dest1_n151.c7 SIMPLE [] +PREHOOK: query: select dest1_n151.* FROM dest1_n151 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n151 +#### A masked pattern was here #### +POSTHOOK: query: select dest1_n151.* FROM dest1_n151 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n151 +#### A masked pattern was here #### +5 5.0 5.0 5.0 5 TRUE 1 diff --git a/ql/src/test/results/clientpositive/llap/cast_datetime_with_sql_2016_format.q.out b/ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out similarity index 95% rename from ql/src/test/results/clientpositive/llap/cast_datetime_with_sql_2016_format.q.out rename to ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out index bfedc77d6a..f245121c48 100644 --- a/ql/src/test/results/clientpositive/llap/cast_datetime_with_sql_2016_format.q.out +++ b/ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out @@ -263,9 +263,11 @@ STAGE PLANS: Processor Tree: TableScan alias: strings + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( s AS timestamp FORMAT 'yyy.mm.dd' ) (type: timestamp) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain from strings select cast (s as date format "yyy.mm.dd") @@ -286,9 +288,11 @@ STAGE PLANS: Processor Tree: TableScan alias: strings + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( s AS date FORMAT 'yyy.mm.dd' ) (type: date) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain from timestamp1 select cast (t as string format "yyyy") @@ -309,9 +313,11 @@ STAGE PLANS: Processor Tree: TableScan alias: timestamp1 + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( t AS string FORMAT 'yyyy' ) (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain from timestamp1 select cast (t as varchar(12) format "yyyy") @@ -332,9 +338,11 @@ STAGE PLANS: Processor Tree: TableScan alias: timestamp1 + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( t AS varchar(12) FORMAT 'yyyy' ) (type: varchar(12)) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select diff --git a/ql/src/test/results/clientpositive/cast_on_constant.q.out b/ql/src/test/results/clientpositive/cast_on_constant.q.out new file mode 100644 index 0000000000..0279796f03 --- /dev/null +++ b/ql/src/test/results/clientpositive/cast_on_constant.q.out @@ -0,0 +1,226 @@ +PREHOOK: query: create table t1_n138(ts_field timestamp, date_field date) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1_n138 +POSTHOOK: query: create table t1_n138(ts_field timestamp, date_field date) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1_n138 +PREHOOK: query: explain select * from t1_n138 where ts_field = "2016-01-23 00:00:00" +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n138 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from t1_n138 where ts_field = "2016-01-23 00:00:00" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n138 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_n138 + filterExpr: (ts_field = TIMESTAMP'2016-01-23 00:00:00') (type: boolean) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ts_field = TIMESTAMP'2016-01-23 00:00:00') (type: boolean) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: TIMESTAMP'2016-01-23 00:00:00' (type: timestamp), date_field (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from t1_n138 where date_field = "2016-01-23" +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n138 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from t1_n138 where date_field = "2016-01-23" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n138 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_n138 + filterExpr: (date_field = DATE'2016-01-23') (type: boolean) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (date_field = DATE'2016-01-23') (type: boolean) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts_field (type: timestamp), DATE'2016-01-23' (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from t1_n138 where ts_field = timestamp '2016-01-23 00:00:00' +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n138 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from t1_n138 where ts_field = timestamp '2016-01-23 00:00:00' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n138 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_n138 + filterExpr: (ts_field = TIMESTAMP'2016-01-23 00:00:00') (type: boolean) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ts_field = TIMESTAMP'2016-01-23 00:00:00') (type: boolean) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: TIMESTAMP'2016-01-23 00:00:00' (type: timestamp), date_field (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from t1_n138 where date_field = date '2016-01-23' +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n138 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from t1_n138 where date_field = date '2016-01-23' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n138 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_n138 + filterExpr: (date_field = DATE'2016-01-23') (type: boolean) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (date_field = DATE'2016-01-23') (type: boolean) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts_field (type: timestamp), DATE'2016-01-23' (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from t1_n138 where date_field = ts_field +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n138 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from t1_n138 where date_field = ts_field +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n138 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_n138 + filterExpr: (CAST( date_field AS TIMESTAMP) = ts_field) (type: boolean) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (CAST( date_field AS TIMESTAMP) = ts_field) (type: boolean) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts_field (type: timestamp), date_field (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table t1_n138 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1_n138 +PREHOOK: Output: default@t1_n138 +POSTHOOK: query: drop table t1_n138 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1_n138 +POSTHOOK: Output: default@t1_n138 diff --git a/ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out b/ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out new file mode 100644 index 0000000000..68a885026c --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out @@ -0,0 +1,1298 @@ +PREHOOK: query: create table s_n3 as select * from src limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@s_n3 +POSTHOOK: query: create table s_n3 as select * from src limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s_n3 +POSTHOOK: Lineage: s_n3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: s_n3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@s_n3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s_n3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: b + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@s_n3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s_n3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@s_n3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s_n3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: b + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +order by key +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@s_n3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +order by key +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s_n3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + TableScan + alias: b + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: explain +select * from( +select src1.key, src2.value +from src src1 left outer join src src2 +on src1.key = src2.key +limit 10)subq1 +union all +select * from( +select src1.key, src2.value +from src src1 left outer join src src2 +on src1.key = src2.key +limit 10)subq2 +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select * from( +select src1.key, src2.value +from src src1 left outer join src src2 +on src1.key = src2.key +limit 10)subq1 +union all +select * from( +select src1.key, src2.value +from src src1 left outer join src src2 +on src1.key = src2.key +limit 10)subq2 +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-5 is a root stage + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@s_n3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s_n3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: b + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@s_n3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s_n3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@s_n3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s_n3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +order by key +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@s_n3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +order by key +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s_n3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + TableScan + Union + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: explain +select * from( +select src1.key, src2.value +from src src1 left outer join src src2 +on src1.key = src2.key +limit 10)subq1 +union all +select * from( +select src1.key, src2.value +from src src1 left outer join src src2 +on src1.key = src2.key +limit 10)subq2 +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select * from( +select src1.key, src2.value +from src src1 left outer join src src2 +on src1.key = src2.key +limit 10)subq1 +union all +select * from( +select src1.key, src2.value +from src src1 left outer join src src2 +on src1.key = src2.key +limit 10)subq2 +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 12 Data size: 1772 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1772 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 10 Data size: 1598 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 10 Data size: 1598 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 12 Data size: 1772 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1772 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@s_n3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s_n3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: b + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@s_n3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select key from s_n3 a +union all +select key from s_n3 b +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s_n3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: b + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/cbo_const.q.out b/ql/src/test/results/clientpositive/cbo_const.q.out similarity index 53% rename from ql/src/test/results/clientpositive/llap/cbo_const.q.out rename to ql/src/test/results/clientpositive/cbo_const.q.out index 0361c93c8b..33e6f97f2d 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_const.q.out +++ b/ql/src/test/results/clientpositive/cbo_const.q.out @@ -19,7 +19,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 2 01:02:03.000000000 2 01:02:03.000000000 2 01:02:03.000000000 2 01:02:03.000000000 true -Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -116,19 +116,34 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(1,2,3) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src - Select Operator - expressions: array(1,2,3) (type: array) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: EXPLAIN select key from (SELECT key from src where key = 1+3)s @@ -141,22 +156,38 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (UDFToDouble(key) = 4.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) = 4.0D) (type: boolean) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) = 4.0D) (type: boolean) - Filter Operator - predicate: (UDFToDouble(key) = 4.0D) (type: boolean) - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: select * from (select key from src where key = '1')subq PREHOOK: type: QUERY @@ -212,122 +243,113 @@ POSTHOOK: Input: default@srcpart #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: ((UDFToDouble(key) = 3.0D) and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) = 3.0D) and value is not null) (type: boolean) - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (UDFToDouble(hr) = 14.0D) and value is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 452 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((ds = '2008-04-08') and (UDFToDouble(hr) = 14.0D) and value is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 452 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: unknown - Map 5 - Map Operator Tree: - TableScan - alias: y - filterExpr: (UDFToDouble(key) = 3.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) = 3.0D) (type: boolean) - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: ((UDFToDouble(key) = 3.0D) and value is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) = 3.0D) and value is not null) (type: boolean) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + TableScan + alias: z + filterExpr: ((ds = '2008-04-08') and (UDFToDouble(hr) = 14.0D) and value is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 452 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((ds = '2008-04-08') and (UDFToDouble(hr) = 14.0D) and value is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 452 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col4 - Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string) + TableScan + alias: y + filterExpr: (UDFToDouble(key) = 3.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) = 3.0D) (type: boolean) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col4 + Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/cbo_input26.q.out b/ql/src/test/results/clientpositive/cbo_input26.q.out new file mode 100644 index 0000000000..770583505d --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_input26.q.out @@ -0,0 +1,668 @@ +PREHOOK: query: explain +select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: explain +select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((ds = '2008-04-08') and (hr = '11')) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 6 Data size: 1574 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 2148 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 2148 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 6 Data size: 1574 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 2148 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 2148 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: b + filterExpr: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '14' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 254 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +0 val_0 2008-04-08 11 +0 val_0 2008-04-08 11 +0 val_0 2008-04-08 11 +10 val_10 2008-04-08 11 +100 val_100 2008-04-08 11 +PREHOOK: query: explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((ds = '2008-04-08') and (hr = '11')) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 6 Data size: 1035 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '2008-04-08' (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 6 Data size: 1035 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '2008-04-08' (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: b + filterExpr: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 452 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 452 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), '14' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +2008-04-08 0 11 +2008-04-08 0 11 +2008-04-08 0 11 +2008-04-08 10 11 +2008-04-08 100 11 +PREHOOK: query: explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((ds = '2008-04-08') and (hr = '11')) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 6 Data size: 1035 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '2008-04-08' (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 6 Data size: 1035 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '2008-04-08' (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: b + filterExpr: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 452 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 452 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), '14' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +2008-04-08 0 11 +2008-04-08 0 11 +2008-04-08 0 11 +2008-04-08 10 11 +2008-04-08 100 11 +PREHOOK: query: explain +select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: explain +select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((ds = '2008-04-08') and (hr = '11')) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 6 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 6 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: b + filterExpr: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +165 2008-04-08 val_165 +27 2008-04-08 val_27 +311 2008-04-08 val_311 +86 2008-04-08 val_86 +238 2008-04-08 val_238 diff --git a/ql/src/test/results/clientpositive/llap/cbo_ppd_non_deterministic.q.out b/ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out similarity index 65% rename from ql/src/test/results/clientpositive/llap/cbo_ppd_non_deterministic.q.out rename to ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out index 80f8cec868..d90ce88bf0 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_ppd_non_deterministic.q.out +++ b/ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out @@ -111,27 +111,44 @@ POSTHOOK: Input: default@testa POSTHOOK: Input: default@testa@part1=CA/part2=ABC/part3=300 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: testa + filterExpr: ((part1 = 'CA') and (part2 = 'ABC')) (type: boolean) + Statistics: Num rows: 2 Data size: 5106 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rand() (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col0 <= 0.5D) and (_col0 > 0.25D)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'CA' (type: string), _col0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: 20 Processor Tree: - TableScan - alias: testa - filterExpr: ((part1 = 'CA') and (part2 = 'ABC')) (type: boolean) - Select Operator - expressions: rand() (type: double) - outputColumnNames: _col0 - Filter Operator - predicate: ((_col0 <= 0.5D) and (_col0 > 0.25D)) (type: boolean) - Select Operator - expressions: 'CA' (type: string), _col0 (type: double) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 20 - ListSink + ListSink PREHOOK: query: explain select * from ( select part1,randum123 @@ -150,25 +167,43 @@ POSTHOOK: Input: default@testa POSTHOOK: Input: default@testa@part1=CA/part2=ABC/part3=300 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: testa + filterExpr: ((part1 = 'CA') and (part2 = 'ABC')) (type: boolean) + Statistics: Num rows: 2 Data size: 5106 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rand() (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col0 <= 0.5D) and (_col0 > 0.25D)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'CA' (type: string), _col0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: 20 Processor Tree: - TableScan - alias: testa - filterExpr: ((part1 = 'CA') and (part2 = 'ABC')) (type: boolean) - Select Operator - expressions: rand() (type: double) - outputColumnNames: _col0 - Filter Operator - predicate: ((_col0 <= 0.5D) and (_col0 > 0.25D)) (type: boolean) - Select Operator - expressions: 'CA' (type: string), _col0 (type: double) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 20 - ListSink + ListSink diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out new file mode 100644 index 0000000000..8aeafc2c8e --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out @@ -0,0 +1,1378 @@ +PREHOOK: query: create table if not exists loc_staging_n1 ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_staging_n1 +POSTHOOK: query: create table if not exists loc_staging_n1 ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_staging_n1 +PREHOOK: query: create table loc_orc_n1 like loc_staging_n1 +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_orc_n1 +POSTHOOK: query: create table loc_orc_n1 like loc_staging_n1 +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_orc_n1 +PREHOOK: query: alter table loc_orc_n1 set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc_n1 +PREHOOK: Output: default@loc_orc_n1 +POSTHOOK: query: alter table loc_orc_n1 set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc_n1 +POSTHOOK: Output: default@loc_orc_n1 +PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging_n1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@loc_staging_n1 +POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging_n1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@loc_staging_n1 +PREHOOK: query: insert overwrite table loc_orc_n1 select * from loc_staging_n1 +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging_n1 +PREHOOK: Output: default@loc_orc_n1 +POSTHOOK: query: insert overwrite table loc_orc_n1 select * from loc_staging_n1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging_n1 +POSTHOOK: Output: default@loc_orc_n1 +POSTHOOK: Lineage: loc_orc_n1.locid SIMPLE [(loc_staging_n1)loc_staging_n1.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_n1.state SIMPLE [(loc_staging_n1)loc_staging_n1.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_n1.year SIMPLE [(loc_staging_n1)loc_staging_n1.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_n1.zip SIMPLE [(loc_staging_n1)loc_staging_n1.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: explain select * from loc_orc_n1 +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from loc_orc_n1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: state, locid, zip, year + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: analyze table loc_orc_n1 compute statistics for columns state +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@loc_orc_n1 +PREHOOK: Output: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_n1 compute statistics for columns state +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@loc_orc_n1 +POSTHOOK: Output: default@loc_orc_n1 +#### A masked pattern was here #### +PREHOOK: query: explain select a, c, min(b) +from ( select state as a, locid as b, count(*) as c + from loc_orc_n1 + group by state,locid + ) sq1 +group by a,c +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select a, c, min(b) +from ( select state as a, locid as b, count(*) as c + from loc_orc_n1 + group by state,locid + ) sq1 +group by a,c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: sq1:loc_orc_n1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: state (type: string), locid (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid, $f2 + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(locid) + keys: state (type: string), $f2 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: state, $f2, $f2_0 + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: analyze table loc_orc_n1 compute statistics for columns state,locid,year +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@loc_orc_n1 +PREHOOK: Output: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_n1 compute statistics for columns state,locid,year +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@loc_orc_n1 +POSTHOOK: Output: default@loc_orc_n1 +#### A masked pattern was here #### +PREHOOK: query: explain select year from loc_orc_n1 group by year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select year from loc_orc_n1 group by year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: year (type: int) + outputColumnNames: year + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: year (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: year + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 32 Data size: 5632 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 32 Data size: 5632 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 24 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by rollup (state,locid) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by rollup (state,locid) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 24 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state)) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 1408 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 8 Data size: 1408 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid)) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 2816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 16 Data size: 2816 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid),()) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 24 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state,locid),(state),(locid),()) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state,locid),(state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 32 Data size: 5632 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 32 Data size: 5632 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select year from loc_orc_n1 group by year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select year from loc_orc_n1 group by year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: year (type: int) + outputColumnNames: year + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: year (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: year + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 2816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 16 Data size: 2816 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,zip from loc_orc_n1 group by state,zip +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,zip from loc_orc_n1 group by state,zip +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), zip (type: bigint) + outputColumnNames: state, zip + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: state (type: string), zip (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: state, zip + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by rollup (state,locid) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by rollup (state,locid) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state)) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid)) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid),()) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state,locid),(state),(locid),()) +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state,locid),(state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select year from loc_orc_n1 group by year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select year from loc_orc_n1 group by year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year (type: int) + outputColumnNames: year + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: year (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: year + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n1 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0L' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: state, locid + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out new file mode 100644 index 0000000000..0035b52311 --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out @@ -0,0 +1,234 @@ +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT cbo_t1.key as k1, cbo_t1.value as v1, + cbo_t2.key as k2, cbo_t2.value as v2 FROM + (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t1 + JOIN + (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t2 + SORT BY k1, v1, k2, v2 +) a +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT cbo_t1.key as k1, cbo_t1.value as v1, + cbo_t2.key as k2, cbo_t2.value as v2 FROM + (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t1 + JOIN + (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t2 + SORT BY k1, v1, k2, v2 +) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + a:cbo_t1:cbo_t3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a:cbo_t1:cbo_t3 + TableScan + alias: a:cbo_t1:cbo_t3 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 20 Data size: 3230 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a:cbo_t2:cbo_t3 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 20 Data size: 3230 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: key, value, key0, value0 + Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(key,value,key0,value0) (type: int) + outputColumnNames: $f0 + Statistics: Num rows: 36 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum($f0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT cbo_t1.key as k1, cbo_t1.value as v1, + cbo_t2.key as k2, cbo_t2.value as v2 FROM + (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t1 + JOIN + (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t2 + SORT BY k1, v1, k2, v2 +) a +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT cbo_t1.key as k1, cbo_t1.value as v1, + cbo_t2.key as k2, cbo_t2.value as v2 FROM + (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t1 + JOIN + (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t2 + SORT BY k1, v1, k2, v2 +) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + a:cbo_t1:cbo_t3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a:cbo_t1:cbo_t3 + TableScan + alias: a:cbo_t1:cbo_t3 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 20 Data size: 3230 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a:cbo_t2:cbo_t3 + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 20 Data size: 3230 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: key, value, key0, value0 + Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(key,value,key0,value0) (type: int) + outputColumnNames: $f0 + Statistics: Num rows: 36 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum($f0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out new file mode 100644 index 0000000000..2e037fdee9 --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out @@ -0,0 +1,174 @@ +PREHOOK: query: CREATE TABLE dest1_n112(key1 INT, value1 STRING, key2 INT, value2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n112 +POSTHOOK: query: CREATE TABLE dest1_n112(key1 INT, value1 STRING, key2 INT, value2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n112 +PREHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n112 SELECT src1.*, src2.* +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n112 +POSTHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n112 SELECT src1.*, src2.* +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n112 +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src1 + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: key, value, key0, value0 + Statistics: Num rows: 791 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), UDFToInteger(key0) (type: int), value0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n112 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n112 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1_n112 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n112 SELECT src1.*, src2.* +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n112 +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n112 SELECT src1.*, src2.* +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n112 +POSTHOOK: Lineage: dest1_n112.key1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n112.key2 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n112.value1 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n112.value2 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1_n112.key1,dest1_n112.value1,dest1_n112.key2,dest1_n112.value2)) FROM dest1_n112 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n112 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1_n112.key1,dest1_n112.value1,dest1_n112.key2,dest1_n112.value2)) FROM dest1_n112 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n112 +#### A masked pattern was here #### +-793937029770 diff --git a/ql/src/test/results/clientpositive/cbo_rp_cross_product_check_2.q.out b/ql/src/test/results/clientpositive/cbo_rp_cross_product_check_2.q.out new file mode 100644 index 0000000000..f445ba3d3d --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_rp_cross_product_check_2.q.out @@ -0,0 +1,765 @@ +PREHOOK: query: create table A_n18 (key string, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@A_n18 +POSTHOOK: query: create table A_n18 (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@A_n18 +PREHOOK: query: insert into A_n18 +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@a_n18 +POSTHOOK: query: insert into A_n18 +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@a_n18 +POSTHOOK: Lineage: a_n18.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: a_n18.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table B_n14 (key string, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@B_n14 +POSTHOOK: query: create table B_n14 (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@B_n14 +PREHOOK: query: insert into B_n14 +select * from src order by key +limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@b_n14 +POSTHOOK: query: insert into B_n14 +select * from src order by key +limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@b_n14 +POSTHOOK: Lineage: b_n14.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: b_n14.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +Warning: Map Join MAPJOIN[8][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain select * from A_n18 join B_n14 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n18 +PREHOOK: Input: default@b_n14 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from A_n18 join B_n14 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n18 +POSTHOOK: Input: default@b_n14 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + b_n14 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b_n14 + TableScan + alias: b_n14 + Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a_n18 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: key, value, key0, value0 + Statistics: Num rows: 5000 Data size: 1770000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5000 Data size: 1770000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +PREHOOK: query: explain select * from B_n14 d1 join B_n14 d2 on d1.key = d2.key join A_n18 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n18 +PREHOOK: Input: default@b_n14 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from B_n14 d1 join B_n14 d2 on d1.key = d2.key join A_n18 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n18 +POSTHOOK: Input: default@b_n14 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + a_n18 + Fetch Operator + limit: -1 + d1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a_n18 + TableScan + alias: a_n18 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 + d1 + TableScan + alias: d1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: d2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: key, value, key0, value0 + Statistics: Num rows: 20 Data size: 7040 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: key, value, key0, value0, key1, value1 + Statistics: Num rows: 10000 Data size: 5300000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10000 Data size: 5300000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[25][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +PREHOOK: query: explain select * from A_n18 join + (select d1.key + from B_n14 d1 join B_n14 d2 on d1.key = d2.key + where 1 = 1 group by d1.key) od1 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n18 +PREHOOK: Input: default@b_n14 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from A_n18 join + (select d1.key + from B_n14 d1 join B_n14 d2 on d1.key = d2.key + where 1 = 1 group by d1.key) od1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n18 +POSTHOOK: Input: default@b_n14 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-7 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + od1:d1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + od1:d1 + TableScan + alias: od1:d1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: od1:d2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: key + Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: key + Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + a_n18 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a_n18 + TableScan + alias: a_n18 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: key, value, key0 + Statistics: Num rows: 2500 Data size: 660000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2500 Data size: 660000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain select * from A_n18 join (select d1.key from B_n14 d1 join B_n14 d2 where 1 = 1 group by d1.key) od1 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n18 +PREHOOK: Input: default@b_n14 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from A_n18 join (select d1.key from B_n14 d1 join B_n14 d2 where 1 = 1 group by d1.key) od1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n18 +POSTHOOK: Input: default@b_n14 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-7 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + od1:d1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + od1:d1 + TableScan + alias: od1:d1 + Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: od1:d2 + Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: key + Statistics: Num rows: 100 Data size: 8600 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 100 Data size: 8600 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: key + Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + a_n18 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a_n18 + TableScan + alias: a_n18 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: key, value, key0 + Statistics: Num rows: 2500 Data size: 660000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2500 Data size: 660000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Map Join MAPJOIN[36][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[20][tables = [, ]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain select * from +(select A_n18.key from A_n18 group by key) ss join +(select d1.key from B_n14 d1 join B_n14 d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n18 +PREHOOK: Input: default@b_n14 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from +(select A_n18.key from A_n18 group by key) ss join +(select d1.key from B_n14 d1 join B_n14 d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n18 +POSTHOOK: Input: default@b_n14 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-8 depends on stages: Stage-1, Stage-4 , consists of Stage-10, Stage-11, Stage-2 + Stage-10 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-10 + Stage-11 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-11 + Stage-2 + Stage-12 is a root stage + Stage-4 depends on stages: Stage-12 + Stage-0 depends on stages: Stage-6, Stage-7, Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss:a_n18 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: key + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: key, key0 + Statistics: Num rows: 1250 Data size: 216250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1250 Data size: 216250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-11 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: key, key0 + Statistics: Num rows: 1250 Data size: 216250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1250 Data size: 216250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string) + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: key, key0 + Statistics: Num rows: 1250 Data size: 216250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1250 Data size: 216250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + od1:d1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + od1:d1 + TableScan + alias: od1:d1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: od1:d2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: key + Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: key + Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out new file mode 100644 index 0000000000..c813456373 --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out @@ -0,0 +1,340 @@ +PREHOOK: query: CREATE TABLE dest1_n166(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n166 +POSTHOOK: query: CREATE TABLE dest1_n166(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n166 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n166 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) +GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n166 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n166 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) +GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n166 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) + outputColumnNames: $f0, $f1, $f2 + Statistics: Num rows: 500 Data size: 131000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT $f1), sum($f1), sum(DISTINCT $f1), count($f2) + keys: $f0 (type: string), $f1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: double), _col5 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: $f0, $f1, $f2, $f3, $f4 + Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: $f0 (type: string), UDFToInteger($f1) (type: int), concat($f0, $f2) (type: string), UDFToInteger($f3) (type: int), UDFToInteger($f4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n166 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n166 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1_n166 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n166 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) +GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n166 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n166 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) +GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n166 +POSTHOOK: Lineage: dest1_n166.c1 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1_n166.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n166.c3 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1_n166.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n166.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n166.* FROM dest1_n166 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n166 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n166.* FROM dest1_n166 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n166 +#### A masked pattern was here #### +0 1 00.0 0 3 +1 71 116414.0 10044 115 +2 69 225571.0 15780 111 +3 62 332004.0 20119 99 +4 74 452763.0 30965 124 +5 6 5397.0 278 10 +6 5 6398.0 331 6 +7 6 7735.0 447 10 +8 8 8762.0 595 10 +9 7 91047.0 577 12 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n166 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) +GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n166 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n166 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) +GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n166 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) + outputColumnNames: $f0, $f1, $f2 + Statistics: Num rows: 500 Data size: 131000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT $f0), sum($f1), sum(DISTINCT $f1), count($f2) + keys: $f0 (type: string), $f1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: double), _col5 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: $f0, $f1, $f2, $f3, $f4 + Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: $f0 (type: string), UDFToInteger($f1) (type: int), concat($f0, $f2) (type: string), UDFToInteger($f3) (type: int), UDFToInteger($f4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n166 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n166 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1_n166 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n166 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) +GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n166 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n166 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) +GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n166 +POSTHOOK: Lineage: dest1_n166.c1 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1_n166.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n166.c3 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1_n166.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n166.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n166.* FROM dest1_n166 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n166 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n166.* FROM dest1_n166 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n166 +#### A masked pattern was here #### +0 1 00.0 0 3 +1 1 116414.0 10044 115 +2 1 225571.0 15780 111 +3 1 332004.0 20119 99 +4 1 452763.0 30965 124 +5 1 5397.0 278 10 +6 1 6398.0 331 6 +7 1 7735.0 447 10 +8 1 8762.0 595 10 +9 1 91047.0 577 12 diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out similarity index 51% rename from ql/src/test/results/clientpositive/llap/cbo_rp_groupby3_noskew_multi_distinct.q.out rename to ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out index f134aee107..c1e30130b9 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out @@ -42,78 +42,65 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1_n123 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) - outputColumnNames: $f0, $f00, $f2 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: $f0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: $f2 (type: double), $f00 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: $f0, $f00, $f2 + Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: $f0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f2 (type: double), $f00 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(KEY._col0:0._col0), count(KEY._col0:0._col0), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), sum(VALUE._col0), sum(VALUE._col1) + mode: complete + outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7 + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: $f0 (type: double), ($f0 / $f1) (type: double), ($f2 / $f3) (type: double), UDFToDouble($f4) (type: double), UDFToDouble($f5) (type: double), power((($f6 - (($f7 * $f7) / $f1)) / $f1), 0.5) (type: double), power((($f6 - (($f7 * $f7) / $f1)) / CASE WHEN (($f1 = 1)) THEN (null) ELSE (($f1 - 1)) END), 0.5) (type: double), (($f6 - (($f7 * $f7) / $f1)) / $f1) (type: double), (($f6 - (($f7 * $f7) / $f1)) / CASE WHEN (($f1 = 1)) THEN (null) ELSE (($f1 - 1)) END) (type: double), $f2 (type: double), UDFToDouble($f3) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n123 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(KEY._col0:0._col0), count(KEY._col0:0._col0), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), sum(VALUE._col0), sum(VALUE._col1) + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') mode: complete - outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7 - Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: $f0 (type: double), ($f0 / $f1) (type: double), ($f2 / $f3) (type: double), UDFToDouble($f4) (type: double), UDFToDouble($f5) (type: double), power((($f6 - (($f7 * $f7) / $f1)) / $f1), 0.5) (type: double), power((($f6 - (($f7 * $f7) / $f1)) / CASE WHEN (($f1 = 1)) THEN (null) ELSE (($f1 - 1)) END), 0.5) (type: double), (($f6 - (($f7 * $f7) / $f1)) / $f1) (type: double), (($f6 - (($f7 * $f7) / $f1)) / CASE WHEN (($f1 = 1)) THEN (null) ELSE (($f1 - 1)) END) (type: double), $f2 (type: double), UDFToDouble($f3) (type: double) + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n123 - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) - outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection Stage: Stage-0 Move Operator @@ -125,7 +112,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n123 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_join0.q.out b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out similarity index 88% rename from ql/src/test/results/clientpositive/llap/cbo_rp_join0.q.out rename to ql/src/test/results/clientpositive/cbo_rp_join0.q.out index ae8bf74193..aca2f517d9 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_join0.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out @@ -24,97 +24,79 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: cbo_t1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), c_int (type: int) - outputColumnNames: key, c_int - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c_int (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: cbo_t2:cbo_t2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), c_int (type: int) - outputColumnNames: key, c_int - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c_int (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: cbo_t3:cbo_t3 - Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: key, c_int, key0, c_int0 - Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), c_int (type: int), key0 (type: string), c_int0 (type: int) - outputColumnNames: key, c_int, p, q - Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), c_int (type: int) + outputColumnNames: key, c_int + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: c_int (type: int) + TableScan + alias: cbo_t2:cbo_t2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), c_int (type: int) + outputColumnNames: key, c_int + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: c_int (type: int) + TableScan + alias: cbo_t3:cbo_t3 + Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join 0 to 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: key, c_int, key0, c_int0 + Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), c_int (type: int), key0 (type: string), c_int0 (type: int) + outputColumnNames: key, c_int, p, q + Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -706,121 +688,99 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: cbo_t1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), c_int (type: int) - outputColumnNames: key, c_int - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c_int (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: cbo_t2:cbo_t2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), c_int (type: int) - outputColumnNames: key, c_int - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c_int (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: cbo_t3:cbo_t3 - Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: cbo_t4:cbo_t1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), c_int (type: int) - outputColumnNames: key, c_int - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c_int (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 - Left Outer Join 0 to 3 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - outputColumnNames: key, c_int, key0, c_int0, key1, c_int2 - Statistics: Num rows: 972 Data size: 259257 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), c_int (type: int), key0 (type: string), c_int0 (type: int), key1 (type: string), c_int2 (type: int) - outputColumnNames: key, c_int, p, q, x, b - Statistics: Num rows: 972 Data size: 259257 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 972 Data size: 259257 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), c_int (type: int) + outputColumnNames: key, c_int + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: c_int (type: int) + TableScan + alias: cbo_t2:cbo_t2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), c_int (type: int) + outputColumnNames: key, c_int + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: c_int (type: int) + TableScan + alias: cbo_t3:cbo_t3 + Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: cbo_t4:cbo_t1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), c_int (type: int) + outputColumnNames: key, c_int + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: c_int (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join 0 to 2 + Left Outer Join 0 to 3 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + 3 key (type: string) + outputColumnNames: key, c_int, key0, c_int0, key1, c_int2 + Statistics: Num rows: 972 Data size: 259257 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), c_int (type: int), key0 (type: string), c_int0 (type: int), key1 (type: string), c_int2 (type: int) + outputColumnNames: key, c_int, p, q, x, b + Statistics: Num rows: 972 Data size: 259257 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 972 Data size: 259257 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/cbo_rp_join1.q.out b/ql/src/test/results/clientpositive/cbo_rp_join1.q.out new file mode 100644 index 0000000000..d75b040e4f --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_rp_join1.q.out @@ -0,0 +1,460 @@ +PREHOOK: query: CREATE TABLE myinput1_n0(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1_n0 +POSTHOOK: query: CREATE TABLE myinput1_n0(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1_n0 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_n0 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@myinput1_n0 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_n0 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@myinput1_n0 +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND b.key = 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND b.key = 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), (key = 40) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean) + TableScan + alias: b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), (key = 40) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {VALUE._col2} + 1 {VALUE._col2} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col3,_col4) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND b.key = 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND b.key = 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +4939870 +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.value = 40 AND a.key = a.value AND b.key = 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.value = 40 AND a.key = a.value AND b.key = 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), (key = 40) (type: boolean), (value = 40) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean) + TableScan + alias: b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), (key = 40) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {VALUE._col2} {VALUE._col3} + 1 {VALUE._col2} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col4,_col5) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.key = a.value AND b.key = 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.key = a.value AND b.key = 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +4939870 +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.key = b.key AND b.key = 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.key = b.key AND b.key = 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), (key = 40) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean) + TableScan + alias: b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), (key = 40) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {VALUE._col2} + 1 {VALUE._col2} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col3,_col4) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.key = b.key AND b.key = 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.key = b.key AND b.key = 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +4939870 +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), (key > 40) (type: boolean), (value > 50) (type: boolean), (key = value) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean) + TableScan + alias: b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), (key > 40) (type: boolean), (value > 50) (type: boolean), (key = value) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {VALUE._col2} {VALUE._col3} {VALUE._col4} + 1 {VALUE._col2} {VALUE._col3} {VALUE._col4} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col5,_col6) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n0 +#### A masked pattern was here #### +4939870 diff --git a/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.out b/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.out new file mode 100644 index 0000000000..5bb0f03875 --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.out @@ -0,0 +1,628 @@ +PREHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((UDFToDouble(key) < 20.0D) and (UDFToDouble(key) > 15.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) < 20.0D) and (UDFToDouble(key) > 15.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: value (type: string) + auto parallelism: false + TableScan + alias: b + filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 30192 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 111 Data size: 30192 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /src [a] + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: key, value, key0, value0 + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string) + outputColumnNames: key, value, key0, value0 + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns key,value,key0,value0 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 +PREHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((UDFToDouble(key) < 20.0D) and (UDFToDouble(key) > 15.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) < 20.0D) and (UDFToDouble(key) > 15.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: value (type: string) + auto parallelism: false + TableScan + alias: b + filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 30192 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 111 Data size: 30192 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /src [a] + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: key, value, key0, value0 + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string) + outputColumnNames: key, value, key0, value0 + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns key,value,key0,value0 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_simple_select.q.out b/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out similarity index 80% rename from ql/src/test/results/clientpositive/llap/cbo_rp_simple_select.q.out rename to ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out index 35a48a173c..5ca758efe2 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_simple_select.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out @@ -752,22 +752,38 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: c_int is not null (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: c_int is not null (type: boolean) + Statistics: Num rows: 18 Data size: 6406 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 18 Data size: 6406 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 18 Data size: 6406 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: c_int is not null (type: boolean) - Filter Operator - predicate: c_int is not null (type: boolean) - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - ListSink + ListSink PREHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) @@ -782,22 +798,38 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = (2 * c_int)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = (2 * c_int)) (type: boolean) - Filter Operator - predicate: (c_int = (2 * c_int)) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - ListSink + ListSink PREHOOK: query: -- c_int is 0 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) @@ -812,22 +844,38 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: c_int is not null (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: c_int is not null (type: boolean) + Statistics: Num rows: 18 Data size: 6406 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 18 Data size: 6406 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 18 Data size: 6406 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: c_int is not null (type: boolean) - Filter Operator - predicate: c_int is not null (type: boolean) - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - ListSink + ListSink PREHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) @@ -862,22 +910,38 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int is not null or (c_int = (2 * c_int))) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int is not null or (c_int = (2 * c_int))) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int is not null or (c_int = (2 * c_int))) (type: boolean) - Filter Operator - predicate: (c_int is not null or (c_int = (2 * c_int))) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - ListSink + ListSink PREHOOK: query: -- no rewrite EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) @@ -892,22 +956,38 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int is not null or (c_int = 0)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int is not null or (c_int = 0)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int is not null or (c_int = 0)) (type: boolean) - Filter Operator - predicate: (c_int is not null or (c_int = 0)) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - ListSink + ListSink PREHOOK: query: -- no rewrite diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_udaf_percentile_approx_23.q.out b/ql/src/test/results/clientpositive/cbo_rp_udaf_percentile_approx_23.q.out similarity index 84% rename from ql/src/test/results/clientpositive/llap/cbo_rp_udaf_percentile_approx_23.q.out rename to ql/src/test/results/clientpositive/cbo_rp_udaf_percentile_approx_23.q.out index c29a36ab59..893f689d85 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_udaf_percentile_approx_23.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_udaf_percentile_approx_23.q.out @@ -277,7 +277,7 @@ POSTHOOK: query: select * from t8_n3 POSTHOOK: type: QUERY POSTHOOK: Input: default@t8_n3 #### A masked pattern was here #### -[25.0,254.08333333333334,476.5612244897959,489.5466666666667] +[23.355555555555558,254.08333333333334,477.0625,488.38271604938274] PREHOOK: query: select * from t9_n2 PREHOOK: type: QUERY PREHOOK: Input: default@t9_n2 @@ -304,7 +304,7 @@ POSTHOOK: query: select * from t11_n3 POSTHOOK: type: QUERY POSTHOOK: Input: default@t11_n3 #### A masked pattern was here #### -[25.0,254.08333333333334,476.5612244897959,489.5466666666667] +[23.355555555555558,254.08333333333334,477.0625,488.38271604938274] PREHOOK: query: select * from t12_n1 PREHOOK: type: QUERY PREHOOK: Input: default@t12_n1 @@ -457,7 +457,7 @@ POSTHOOK: query: select * from t8_n3 POSTHOOK: type: QUERY POSTHOOK: Input: default@t8_n3 #### A masked pattern was here #### -[25.0,254.08333333333334,476.5612244897959,489.5466666666667] +[23.355555555555558,254.08333333333334,477.0625,488.38271604938274] PREHOOK: query: select * from t9_n2 PREHOOK: type: QUERY PREHOOK: Input: default@t9_n2 @@ -484,7 +484,7 @@ POSTHOOK: query: select * from t11_n3 POSTHOOK: type: QUERY POSTHOOK: Input: default@t11_n3 #### A masked pattern was here #### -[25.0,254.08333333333334,476.5612244897959,489.5466666666667] +[23.355555555555558,254.08333333333334,477.0625,488.38271604938274] PREHOOK: query: select * from t12_n1 PREHOOK: type: QUERY PREHOOK: Input: default@t12_n1 @@ -510,49 +510,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: bucket_n1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: CASE WHEN ((key < 100.0D)) THEN (NaND) ELSE (key) END (type: double) - outputColumnNames: $f0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: percentile_approx($f0, 0.5) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: array) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: bucket_n1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((key < 100.0D)) THEN (NaND) ELSE (key) END (type: double) + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: percentile_approx(VALUE._col0) - mode: mergepartial - outputColumnNames: $f0 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + aggregations: percentile_approx($f0, 0.5) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: array) + Reduce Operator Tree: + Group By Operator + aggregations: percentile_approx(VALUE._col0) + mode: mergepartial + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -568,7 +558,7 @@ POSTHOOK: query: select percentile_approx(case when key < 100 then cast('NaN' as POSTHOOK: type: QUERY POSTHOOK: Input: default@bucket_n1 #### A masked pattern was here #### -false +true PREHOOK: query: explain select percentile_approx(key, 0.5) from bucket_n1 PREHOOK: type: QUERY @@ -585,49 +575,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: bucket_n1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: double) - outputColumnNames: $f0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: percentile_approx($f0, 0.5) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: array) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: bucket_n1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: double) + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: percentile_approx(VALUE._col0) - mode: mergepartial - outputColumnNames: $f0 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + aggregations: percentile_approx($f0, 0.5) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: array) + Reduce Operator Tree: + Group By Operator + aggregations: percentile_approx(VALUE._col0) + mode: mergepartial + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/cbo_stats_estimation.q.out b/ql/src/test/results/clientpositive/cbo_stats_estimation.q.out new file mode 100644 index 0000000000..389a9bc1cc --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_stats_estimation.q.out @@ -0,0 +1,278 @@ +PREHOOK: query: CREATE TABLE claims(claim_rec_id bigint, claim_invoice_num string, typ_c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@claims +POSTHOOK: query: CREATE TABLE claims(claim_rec_id bigint, claim_invoice_num string, typ_c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@claims +PREHOOK: query: ALTER TABLE claims UPDATE STATISTICS set ('numRows'='1154941534','rawDataSize'='1135307527922') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@claims +PREHOOK: Output: default@claims +POSTHOOK: query: ALTER TABLE claims UPDATE STATISTICS set ('numRows'='1154941534','rawDataSize'='1135307527922') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@claims +POSTHOOK: Output: default@claims +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 +PREHOOK: type: QUERY +PREHOOK: Input: default@claims +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@claims +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` +FROM `default`.`claims` +WHERE `typ_c` = 3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: claims + filterExpr: (typ_c = 3) (type: boolean) + Statistics: Num rows: 1154941534 Data size: 1135307527922 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (typ_c = 3) (type: boolean) + Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: claims + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns claim_rec_id,claim_invoice_num,typ_c + columns.comments + columns.types bigint:string:int +#### A masked pattern was here #### + name default.claims + numFiles 0 + numRows 1154941534 + rawDataSize 1135307527922 + serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns claim_rec_id,claim_invoice_num,typ_c + columns.comments + columns.types bigint:string:int +#### A masked pattern was here #### + name default.claims + numFiles 0 + numRows 1154941534 + rawDataSize 1135307527922 + serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.claims + name: default.claims + Truncated Path -> Alias: + /claims [claims] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 +PREHOOK: type: QUERY +PREHOOK: Input: default@claims +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@claims +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` +FROM `default`.`claims` +WHERE `typ_c` = 3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: claims + filterExpr: (typ_c = 3) (type: boolean) + Statistics: Num rows: 1154941534 Data size: 1135307527922 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (typ_c = 3) (type: boolean) + Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: claims + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns claim_rec_id,claim_invoice_num,typ_c + columns.comments + columns.types bigint:string:int +#### A masked pattern was here #### + name default.claims + numFiles 0 + numRows 1154941534 + rawDataSize 1135307527922 + serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns claim_rec_id,claim_invoice_num,typ_c + columns.comments + columns.types bigint:string:int +#### A masked pattern was here #### + name default.claims + numFiles 0 + numRows 1154941534 + rawDataSize 1135307527922 + serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.claims + name: default.claims + Truncated Path -> Alias: + /claims [claims] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/cbo_union_view.q.out b/ql/src/test/results/clientpositive/cbo_union_view.q.out new file mode 100644 index 0000000000..4d2e28ca04 --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_union_view.q.out @@ -0,0 +1,268 @@ +PREHOOK: query: CREATE TABLE src_union_1 (key int, value string) PARTITIONED BY (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_union_1 +POSTHOOK: query: CREATE TABLE src_union_1 (key int, value string) PARTITIONED BY (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_union_1 +PREHOOK: query: CREATE TABLE src_union_2 (key int, value string) PARTITIONED BY (ds string, part_1 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_union_2 +POSTHOOK: query: CREATE TABLE src_union_2 (key int, value string) PARTITIONED BY (ds string, part_1 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_union_2 +PREHOOK: query: CREATE TABLE src_union_3 (key int, value string) PARTITIONED BY (ds string, part_1 string, part_2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_union_3 +POSTHOOK: query: CREATE TABLE src_union_3 (key int, value string) PARTITIONED BY (ds string, part_1 string, part_2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_union_3 +PREHOOK: query: CREATE VIEW src_union_view PARTITIONED ON (ds) as +SELECT key, value, ds FROM ( +SELECT key, value, ds FROM src_union_1 +UNION ALL +SELECT key, value, ds FROM src_union_2 +UNION ALL +SELECT key, value, ds FROM src_union_3 +) subq +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@src_union_1 +PREHOOK: Input: default@src_union_2 +PREHOOK: Input: default@src_union_3 +PREHOOK: Output: database:default +PREHOOK: Output: default@src_union_view +POSTHOOK: query: CREATE VIEW src_union_view PARTITIONED ON (ds) as +SELECT key, value, ds FROM ( +SELECT key, value, ds FROM src_union_1 +UNION ALL +SELECT key, value, ds FROM src_union_2 +UNION ALL +SELECT key, value, ds FROM src_union_3 +) subq +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@src_union_1 +POSTHOOK: Input: default@src_union_2 +POSTHOOK: Input: default@src_union_3 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_union_view +POSTHOOK: Lineage: src_union_view.key EXPRESSION [(src_union_1)src_union_1.FieldSchema(name:key, type:int, comment:null), (src_union_2)src_union_2.FieldSchema(name:key, type:int, comment:null), (src_union_3)src_union_3.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: src_union_view.value EXPRESSION [(src_union_1)src_union_1.FieldSchema(name:value, type:string, comment:null), (src_union_2)src_union_2.FieldSchema(name:value, type:string, comment:null), (src_union_3)src_union_3.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_union_1 +PREHOOK: Input: default@src_union_2 +PREHOOK: Input: default@src_union_3 +PREHOOK: Input: default@src_union_view +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_union_1 +POSTHOOK: Input: default@src_union_2 +POSTHOOK: Input: default@src_union_3 +POSTHOOK: Input: default@src_union_view +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src_union_1 + filterExpr: (key = 86) (type: boolean) + properties: + insideView TRUE + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string), ds (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 3 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: src_union_2 + filterExpr: (key = 86) (type: boolean) + properties: + insideView TRUE + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string), ds (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 3 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: src_union_3 + filterExpr: (key = 86) (type: boolean) + properties: + insideView TRUE + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string), ds (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 3 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='1' +PREHOOK: type: QUERY +PREHOOK: Input: default@src_union_1 +PREHOOK: Input: default@src_union_2 +PREHOOK: Input: default@src_union_3 +PREHOOK: Input: default@src_union_view +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_union_1 +POSTHOOK: Input: default@src_union_2 +POSTHOOK: Input: default@src_union_3 +POSTHOOK: Input: default@src_union_view +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src_union_1 + filterExpr: ((key = 86) and (ds = '1')) (type: boolean) + properties: + insideView TRUE + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((key = 86) and (ds = '1')) (type: boolean) + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 3 Data size: 252 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 86 (type: int), _col0 (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 519 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 519 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: src_union_2 + filterExpr: ((key = 86) and (ds = '1')) (type: boolean) + properties: + insideView TRUE + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((key = 86) and (ds = '1')) (type: boolean) + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 3 Data size: 252 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 86 (type: int), _col0 (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 519 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 519 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: src_union_3 + filterExpr: ((key = 86) and (ds = '1')) (type: boolean) + properties: + insideView TRUE + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((key = 86) and (ds = '1')) (type: boolean) + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 3 Data size: 252 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 86 (type: int), _col0 (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 519 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 519 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/char_serde.q.out b/ql/src/test/results/clientpositive/char_serde.q.out similarity index 86% rename from ql/src/test/results/clientpositive/llap/char_serde.q.out rename to ql/src/test/results/clientpositive/char_serde.q.out index b9bf3cfb37..03213b64b7 100644 --- a/ql/src/test/results/clientpositive/llap/char_serde.q.out +++ b/ql/src/test/results/clientpositive/char_serde.q.out @@ -54,24 +54,24 @@ POSTHOOK: query: load data local inpath '../../data/files/srcbucket0.txt' overwr POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@char_serde_regex -PREHOOK: query: select * from char_serde_regex order by key, value limit 5 +PREHOOK: query: select * from char_serde_regex limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@char_serde_regex #### A masked pattern was here #### -POSTHOOK: query: select * from char_serde_regex order by key, value limit 5 +POSTHOOK: query: select * from char_serde_regex limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_serde_regex #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_1 -0 val_1 -PREHOOK: query: select value, count(*) from char_serde_regex group by value order by value limit 5 +474 val_475 +62 val_63 +468 val_469 +272 val_273 +448 val_449 +PREHOOK: query: select value, count(*) from char_serde_regex group by value limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@char_serde_regex #### A masked pattern was here #### -POSTHOOK: query: select value, count(*) from char_serde_regex group by value order by value limit 5 +POSTHOOK: query: select value, count(*) from char_serde_regex group by value limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_serde_regex #### A masked pattern was here #### @@ -114,24 +114,24 @@ POSTHOOK: Input: default@char_serde_regex POSTHOOK: Output: default@char_serde_lb POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:), ] POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:), ] -PREHOOK: query: select * from char_serde_lb order by key, value limit 5 +PREHOOK: query: select * from char_serde_lb limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@char_serde_lb #### A masked pattern was here #### -POSTHOOK: query: select * from char_serde_lb order by key, value limit 5 +POSTHOOK: query: select * from char_serde_lb limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_serde_lb #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_1 -0 val_1 -PREHOOK: query: select value, count(*) from char_serde_lb group by value order by value limit 5 +474 val_475 +62 val_63 +468 val_469 +272 val_273 +448 val_449 +PREHOOK: query: select value, count(*) from char_serde_lb group by value limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@char_serde_lb #### A masked pattern was here #### -POSTHOOK: query: select value, count(*) from char_serde_lb group by value order by value limit 5 +POSTHOOK: query: select value, count(*) from char_serde_lb group by value limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_serde_lb #### A masked pattern was here #### @@ -174,24 +174,24 @@ POSTHOOK: Input: default@char_serde_lb POSTHOOK: Output: default@char_serde_ls POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:null), ] POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:null), ] -PREHOOK: query: select * from char_serde_ls order by key, value limit 5 +PREHOOK: query: select * from char_serde_ls limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@char_serde_ls #### A masked pattern was here #### -POSTHOOK: query: select * from char_serde_ls order by key, value limit 5 +POSTHOOK: query: select * from char_serde_ls limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_serde_ls #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_1 -0 val_1 -PREHOOK: query: select value, count(*) from char_serde_ls group by value order by value limit 5 +474 val_475 +62 val_63 +468 val_469 +272 val_273 +448 val_449 +PREHOOK: query: select value, count(*) from char_serde_ls group by value limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@char_serde_ls #### A masked pattern was here #### -POSTHOOK: query: select value, count(*) from char_serde_ls group by value order by value limit 5 +POSTHOOK: query: select value, count(*) from char_serde_ls group by value limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_serde_ls #### A masked pattern was here #### @@ -234,24 +234,24 @@ POSTHOOK: Input: default@char_serde_ls POSTHOOK: Output: default@char_serde_c POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:null), ] POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:null), ] -PREHOOK: query: select * from char_serde_c order by key, value limit 5 +PREHOOK: query: select * from char_serde_c limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@char_serde_c #### A masked pattern was here #### -POSTHOOK: query: select * from char_serde_c order by key, value limit 5 +POSTHOOK: query: select * from char_serde_c limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_serde_c #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_1 -0 val_1 -PREHOOK: query: select value, count(*) from char_serde_c group by value order by value limit 5 +474 val_475 +62 val_63 +468 val_469 +272 val_273 +448 val_449 +PREHOOK: query: select value, count(*) from char_serde_c group by value limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@char_serde_c #### A masked pattern was here #### -POSTHOOK: query: select value, count(*) from char_serde_c group by value order by value limit 5 +POSTHOOK: query: select value, count(*) from char_serde_c group by value limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_serde_c #### A masked pattern was here #### @@ -294,24 +294,24 @@ POSTHOOK: Input: default@char_serde_c POSTHOOK: Output: default@char_serde_lbc POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:null), ] POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:null), ] -PREHOOK: query: select * from char_serde_lbc order by key, value limit 5 +PREHOOK: query: select * from char_serde_lbc limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@char_serde_lbc #### A masked pattern was here #### -POSTHOOK: query: select * from char_serde_lbc order by key, value limit 5 +POSTHOOK: query: select * from char_serde_lbc limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_serde_lbc #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_1 -0 val_1 -PREHOOK: query: select value, count(*) from char_serde_lbc group by value order by value limit 5 +474 val_475 +62 val_63 +468 val_469 +272 val_273 +448 val_449 +PREHOOK: query: select value, count(*) from char_serde_lbc group by value limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@char_serde_lbc #### A masked pattern was here #### -POSTHOOK: query: select value, count(*) from char_serde_lbc group by value order by value limit 5 +POSTHOOK: query: select value, count(*) from char_serde_lbc group by value limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_serde_lbc #### A masked pattern was here #### @@ -354,24 +354,24 @@ POSTHOOK: Input: default@char_serde_lbc POSTHOOK: Output: default@char_serde_orc POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:null), ] POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:null), ] -PREHOOK: query: select * from char_serde_orc order by key, value limit 5 +PREHOOK: query: select * from char_serde_orc limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@char_serde_orc #### A masked pattern was here #### -POSTHOOK: query: select * from char_serde_orc order by key, value limit 5 +POSTHOOK: query: select * from char_serde_orc limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_serde_orc #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_1 -0 val_1 -PREHOOK: query: select value, count(*) from char_serde_orc group by value order by value limit 5 +474 val_475 +62 val_63 +468 val_469 +272 val_273 +448 val_449 +PREHOOK: query: select value, count(*) from char_serde_orc group by value limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@char_serde_orc #### A masked pattern was here #### -POSTHOOK: query: select value, count(*) from char_serde_orc group by value order by value limit 5 +POSTHOOK: query: select value, count(*) from char_serde_orc group by value limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_serde_orc #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/char_udf1.q.out b/ql/src/test/results/clientpositive/char_udf1.q.out similarity index 99% rename from ql/src/test/results/clientpositive/llap/char_udf1.q.out rename to ql/src/test/results/clientpositive/char_udf1.q.out index c376f949f7..a2b4bed275 100644 --- a/ql/src/test/results/clientpositive/llap/char_udf1.q.out +++ b/ql/src/test/results/clientpositive/char_udf1.q.out @@ -40,7 +40,7 @@ from char_udf_1 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_udf_1 #### A masked pattern was here #### -238val_238 238val_238 true +238val_238 238val_238 true PREHOOK: query: select upper(c2), upper(c4), @@ -57,7 +57,7 @@ from char_udf_1 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_udf_1 #### A masked pattern was here #### -VAL_238 VAL_238 true +VAL_238 VAL_238 true PREHOOK: query: select lower(c2), lower(c4), @@ -74,7 +74,7 @@ from char_udf_1 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@char_udf_1 #### A masked pattern was here #### -val_238 val_238 true +val_238 val_238 true PREHOOK: query: select ascii(c2), ascii(c4), diff --git a/ql/src/test/results/clientpositive/clusterctas.q.out b/ql/src/test/results/clientpositive/clusterctas.q.out new file mode 100644 index 0000000000..9d76bc5903 --- /dev/null +++ b/ql/src/test/results/clientpositive/clusterctas.q.out @@ -0,0 +1,142 @@ +PREHOOK: query: EXPLAIN +CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: EXPLAIN +CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-3 + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.x + Write Type: INSERT + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: col1, col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + Write Type: INSERT + + Stage: Stage-4 + Create Table + columns: key string, value string + name: default.x + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde name: org.apache.hadoop.hive.ql.io.orc.OrcSerde + table properties: + transactional true + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.x + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +POSTHOOK: Lineage: x.key SIMPLE [(src)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: x.value SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DROP TABLE x +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@x +PREHOOK: Output: default@x +POSTHOOK: query: DROP TABLE x +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@x +POSTHOOK: Output: default@x diff --git a/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_2.q.out b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out similarity index 77% rename from ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_2.q.out rename to ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out index 31baa2d7b2..ac2e48f54c 100644 --- a/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_2.q.out +++ b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out @@ -212,49 +212,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: calendar - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: month (type: int) - outputColumnNames: month - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(month) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: calendar + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: month (type: int) + outputColumnNames: month + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial + aggregations: max(month) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -354,49 +345,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: calendar - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: month (type: int) - outputColumnNames: month - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(month) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: calendar + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: month (type: int) + outputColumnNames: month + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial + aggregations: max(month) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/column_pruner_multiple_children.q.out b/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out similarity index 59% rename from ql/src/test/results/clientpositive/llap/column_pruner_multiple_children.q.out rename to ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out index bea9396a5a..b72c13ad22 100644 --- a/ql/src/test/results/clientpositive/llap/column_pruner_multiple_children.q.out +++ b/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out @@ -30,85 +30,58 @@ POSTHOOK: Input: default@s_n129 POSTHOOK: Output: default@dest1_n52 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s_n129 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n52 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection + Map Reduce + Map Operator Tree: + TableScan + alias: s_n129 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n52 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -120,7 +93,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n52 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -128,6 +101,30 @@ STAGE PLANS: Column Types: int, string Table: default.dest1_n52 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM S_n129 INSERT OVERWRITE TABLE DEST1_n52 SELECT key, sum(SUBSTR(value,5)) GROUP BY key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/column_pruning_partitioned_view.q.out b/ql/src/test/results/clientpositive/column_pruning_partitioned_view.q.out similarity index 89% rename from ql/src/test/results/clientpositive/llap/column_pruning_partitioned_view.q.out rename to ql/src/test/results/clientpositive/column_pruning_partitioned_view.q.out index 2ccdff3d1c..e9c0d424f0 100644 --- a/ql/src/test/results/clientpositive/llap/column_pruning_partitioned_view.q.out +++ b/ql/src/test/results/clientpositive/column_pruning_partitioned_view.q.out @@ -39,8 +39,10 @@ STAGE PLANS: alias: lv_table properties: insideView TRUE + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: string), c2 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/llap/columnarserde_create_shortcut.q.out b/ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out similarity index 79% rename from ql/src/test/results/clientpositive/llap/columnarserde_create_shortcut.q.out rename to ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out index 4dc1368a84..09ebed0c0a 100644 --- a/ql/src/test/results/clientpositive/llap/columnarserde_create_shortcut.q.out +++ b/ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out @@ -20,53 +20,40 @@ POSTHOOK: Input: default@src_thrift POSTHOOK: Output: default@columnarserde_create_shortcut STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src_thrift - Statistics: Num rows: 11 Data size: 34628 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: lint (type: array), lstring (type: array), mstringstring (type: map), aint (type: int), astring (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 11 Data size: 34628 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: 1 (type: int) - Statistics: Num rows: 11 Data size: 34628 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: array), _col1 (type: array), _col2 (type: map), _col3 (type: int), _col4 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: array), VALUE._col1 (type: array), VALUE._col2 (type: map), VALUE._col3 (type: int), VALUE._col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Map Reduce + Map Operator Tree: + TableScan + alias: src_thrift + Statistics: Num rows: 11 Data size: 34628 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lint (type: array), lstring (type: array), mstringstring (type: map), aint (type: int), astring (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 11 Data size: 34628 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: 1 (type: int) Statistics: Num rows: 11 Data size: 34628 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 11 Data size: 34628 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.columnarserde_create_shortcut - - Stage: Stage-2 - Dependency Collection + value expressions: _col0 (type: array), _col1 (type: array), _col2 (type: map), _col3 (type: int), _col4 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: array), VALUE._col1 (type: array), VALUE._col2 (type: map), VALUE._col3 (type: int), VALUE._col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 11 Data size: 34628 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 34628 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.columnarserde_create_shortcut Stage: Stage-0 Move Operator @@ -78,7 +65,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.columnarserde_create_shortcut - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out new file mode 100644 index 0000000000..f12577c3c2 --- /dev/null +++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out @@ -0,0 +1,977 @@ +PREHOOK: query: DROP TABLE Employee_Part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE Employee_Part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@Employee_Part +POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@Employee_Part +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=2000.0) +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=2000.0) +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=4000.0) +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=4000.0) +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 +PREHOOK: query: explain +analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +#### A masked pattern was here #### +POSTHOOK: query: explain +analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part + filterExpr: (employeesalary = 2000.0D) (type: boolean) + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: employeeid (type: int) + outputColumnNames: employeeid + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(employeeid, 'hll') + keys: 2000.0D (type: double) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: 2000.0D (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: 2000.0D (type: double) + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: 2000.0D (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: struct), 2000.0D (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: employeeID + Column Types: int + Table: default.employee_part + +PREHOOK: query: explain extended +analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +#### A masked pattern was here #### +POSTHOOK: query: explain extended +analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part + filterExpr: (employeesalary = 2000.0D) (type: boolean) + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + Statistics Aggregation Key Prefix: default.employee_part/ + GatherStats: true + Select Operator + expressions: employeeid (type: int) + outputColumnNames: employeeid + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(employeeid, 'hll') + keys: 2000.0D (type: double) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: 2000.0D (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: 2000.0D (type: double) + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: employeesalary=2000.0 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + employeesalary 2000.0 + properties: + bucket_count -1 + column.name.delimiter , + columns employeeid,employeename + columns.comments + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.employee_part + numFiles 1 + numRows 0 + partition_columns employeesalary + partition_columns.types double + rawDataSize 0 + serialization.ddl struct employee_part { i32 employeeid, string employeename} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 105 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns employeeid,employeename + columns.comments + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.employee_part + partition_columns employeesalary + partition_columns.types double + serialization.ddl struct employee_part { i32 employeeid, string employeename} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.employee_part + name: default.employee_part + Truncated Path -> Alias: + /employee_part/employeesalary=2000.0 [employee_part] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: 2000.0D (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: struct), 2000.0D (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:double + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.employee_part/ + Column Stats Desc: + Columns: employeeID + Column Types: int + Table: default.employee_part + Is Table Level Stats: false + +PREHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +#### A masked pattern was here #### +POSTHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +#### A masked pattern was here #### +PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee_part +POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee_part +# col_name data_type comment +employeeid int +employeename string + +# Partition Information +# col_name data_type comment +employeesalary double + +# Detailed Partition Information +Partition Value: [2000.0] +Database: default +Table: employee_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"employeeid\":\"true\"}} + numFiles 1 + numRows 13 + rawDataSize 92 + totalSize 105 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + field.delim | + serialization.format | +PREHOOK: query: explain +analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +POSTHOOK: query: explain +analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part + filterExpr: (employeesalary = 4000.0D) (type: boolean) + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: employeeid (type: int) + outputColumnNames: employeeid + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(employeeid, 'hll') + keys: 4000.0D (type: double) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: 4000.0D (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: 4000.0D (type: double) + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: 4000.0D (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: struct), 4000.0D (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: employeeID + Column Types: int + Table: default.employee_part + +PREHOOK: query: explain extended +analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +POSTHOOK: query: explain extended +analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part + filterExpr: (employeesalary = 4000.0D) (type: boolean) + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + Statistics Aggregation Key Prefix: default.employee_part/ + GatherStats: true + Select Operator + expressions: employeeid (type: int) + outputColumnNames: employeeid + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(employeeid, 'hll') + keys: 4000.0D (type: double) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: 4000.0D (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: 4000.0D (type: double) + Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: employeesalary=4000.0 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + employeesalary 4000.0 + properties: + bucket_count -1 + column.name.delimiter , + columns employeeid,employeename + columns.comments + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.employee_part + numFiles 1 + numRows 0 + partition_columns employeesalary + partition_columns.types double + rawDataSize 0 + serialization.ddl struct employee_part { i32 employeeid, string employeename} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 105 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns employeeid,employeename + columns.comments + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.employee_part + partition_columns employeesalary + partition_columns.types double + serialization.ddl struct employee_part { i32 employeeid, string employeename} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.employee_part + name: default.employee_part + Truncated Path -> Alias: + /employee_part/employeesalary=4000.0 [employee_part] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: 4000.0D (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: struct), 4000.0D (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:double + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.employee_part/ + Column Stats Desc: + Columns: employeeID + Column Types: int + Table: default.employee_part + Is Table Level Stats: false + +PREHOOK: query: analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +POSTHOOK: query: analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +PREHOOK: query: explain +analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +#### A masked pattern was here #### +POSTHOOK: query: explain +analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part + filterExpr: (employeesalary = 2000.0D) (type: boolean) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: employeeid (type: int), employeename (type: string) + outputColumnNames: employeeid, employeename + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') + keys: 2000.0D (type: double) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: 2000.0D (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: 2000.0D (type: double) + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: 2000.0D (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), 2000.0D (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: employeeid, employeename + Column Types: int, string + Table: default.employee_part + +PREHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +#### A masked pattern was here #### +POSTHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +#### A masked pattern was here #### +PREHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeID +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee_part +POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeID +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee_part +col_name employeeID +data_type int +min 16 +max 34 +num_nulls 1 +distinct_count 12 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +comment from deserializer +PREHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeName +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee_part +POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeName +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee_part +col_name employeeName +data_type string +min +max +num_nulls 1 +distinct_count 12 +avg_col_len 4.3076923076923075 +max_col_len 6 +num_trues +num_falses +bit_vector HL +comment from deserializer +PREHOOK: query: explain +analyze table Employee_Part compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=2000.0 +PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +POSTHOOK: query: explain +analyze table Employee_Part compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=2000.0 +POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part + Statistics: Num rows: 26 Data size: 2596 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: employeeid (type: int), employeename (type: string), employeesalary (type: double) + outputColumnNames: employeeid, employeename, employeesalary + Statistics: Num rows: 26 Data size: 2596 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') + keys: employeesalary (type: double) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 1744 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 2 Data size: 1744 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 1776 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 1776 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 1776 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: employeeid, employeename + Column Types: int, string + Table: default.employee_part + +PREHOOK: query: analyze table Employee_Part compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=2000.0 +PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +POSTHOOK: query: analyze table Employee_Part compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=2000.0 +POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) employeeID +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee_part +POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) employeeID +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee_part +col_name employeeID +data_type int +min 16 +max 34 +num_nulls 1 +distinct_count 12 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +comment from deserializer +PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=4000.0) employeeID +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee_part +POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=4000.0) employeeID +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee_part +col_name employeeID +data_type int +min 16 +max 34 +num_nulls 1 +distinct_count 12 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +comment from deserializer +PREHOOK: query: explain +analyze table Employee_Part compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=2000.0 +PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +POSTHOOK: query: explain +analyze table Employee_Part compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=2000.0 +POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part + Statistics: Num rows: 26 Data size: 2300 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: employeeid (type: int), employeename (type: string) + outputColumnNames: employeeid, employeename + Statistics: Num rows: 26 Data size: 2300 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: employeeid, employeename + Column Types: int, string + Table: default.employee_part + +PREHOOK: query: analyze table Employee_Part compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=2000.0 +PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +POSTHOOK: query: analyze table Employee_Part compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=2000.0 +POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +PREHOOK: query: describe formatted Employee_Part employeeID +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee_part +POSTHOOK: query: describe formatted Employee_Part employeeID +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee_part +col_name employeeID +data_type int +min 16 +max 34 +num_nulls 2 +distinct_count 12 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}} +PREHOOK: query: create database if not exists dummydb +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:dummydb +POSTHOOK: query: create database if not exists dummydb +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:dummydb +PREHOOK: query: use dummydb +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:dummydb +POSTHOOK: query: use dummydb +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:dummydb +PREHOOK: query: analyze table default.Employee_Part partition (employeeSalary=2000.0) compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +#### A masked pattern was here #### +POSTHOOK: query: analyze table default.Employee_Part partition (employeeSalary=2000.0) compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +#### A masked pattern was here #### +PREHOOK: query: describe formatted default.Employee_Part partition (employeeSalary=2000.0) employeeID +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@employee_part +POSTHOOK: query: describe formatted default.Employee_Part partition (employeeSalary=2000.0) employeeID +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@employee_part +col_name employeeID +data_type int +min 16 +max 34 +num_nulls 1 +distinct_count 12 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}} +PREHOOK: query: analyze table default.Employee_Part compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@employee_part +PREHOOK: Input: default@employee_part@employeesalary=2000.0 +PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +POSTHOOK: query: analyze table default.Employee_Part compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Input: default@employee_part@employeesalary=2000.0 +POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 +#### A masked pattern was here #### +PREHOOK: query: use default +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:default +POSTHOOK: query: use default +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:default +PREHOOK: query: drop database dummydb +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:dummydb +PREHOOK: Output: database:dummydb +POSTHOOK: query: drop database dummydb +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:dummydb +POSTHOOK: Output: database:dummydb diff --git a/ql/src/test/results/clientpositive/llap/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out similarity index 75% rename from ql/src/test/results/clientpositive/llap/columnstats_partlvl_dp.q.out rename to ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out index 9ddb93654e..e2946a227e 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_partlvl_dp.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out @@ -84,64 +84,54 @@ POSTHOOK: Output: default@employee_part_n0@employeesalary=4000.0/country=USA #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: employee_part_n0 - filterExpr: (employeesalary = 4000.0D) (type: boolean) - Statistics: Num rows: 1 Data size: 1012 Basic stats: PARTIAL Column stats: PARTIAL - Select Operator - expressions: employeeid (type: int), employeename (type: string), country (type: string) - outputColumnNames: employeeid, employeename, country - Statistics: Num rows: 1 Data size: 1012 Basic stats: PARTIAL Column stats: PARTIAL - Group By Operator - aggregations: compute_stats(employeename, 'hll'), compute_stats(employeeid, 'hll') - keys: 4000.0D (type: double), country (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1236 Basic stats: PARTIAL Column stats: PARTIAL - Reduce Output Operator - key expressions: 4000.0D (type: double), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: 4000.0D (type: double), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1236 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part_n0 + filterExpr: (employeesalary = 4000.0D) (type: boolean) + Statistics: Num rows: 1 Data size: 1012 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: employeeid (type: int), employeename (type: string), country (type: string) + outputColumnNames: employeeid, employeename, country + Statistics: Num rows: 1 Data size: 1012 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: 4000.0D (type: double), KEY._col1 (type: string) - mode: mergepartial + aggregations: compute_stats(employeename, 'hll'), compute_stats(employeeid, 'hll') + keys: 4000.0D (type: double), country (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1252 Basic stats: PARTIAL Column stats: PARTIAL - Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), 4000.0D (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1252 Basic stats: PARTIAL Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1252 Basic stats: PARTIAL Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 1236 Basic stats: PARTIAL Column stats: PARTIAL + Reduce Output Operator + key expressions: 4000.0D (type: double), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: 4000.0D (type: double), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1236 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: 4000.0D (type: double), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1252 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), 4000.0D (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1252 Basic stats: PARTIAL Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: PARTIAL Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Stats Work Basic Stats Work: Column Stats Desc: @@ -242,64 +232,54 @@ POSTHOOK: Output: default@employee_part_n0@employeesalary=2000.0/country=USA #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: employee_part_n0 - filterExpr: (employeesalary = 2000.0D) (type: boolean) - Statistics: Num rows: 3 Data size: 2254 Basic stats: PARTIAL Column stats: PARTIAL - Select Operator - expressions: employeeid (type: int), country (type: string) - outputColumnNames: employeeid, country - Statistics: Num rows: 3 Data size: 2254 Basic stats: PARTIAL Column stats: PARTIAL - Group By Operator - aggregations: compute_stats(employeeid, 'hll') - keys: 2000.0D (type: double), country (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 1836 Basic stats: PARTIAL Column stats: PARTIAL - Reduce Output Operator - key expressions: 2000.0D (type: double), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: 2000.0D (type: double), _col1 (type: string) - Statistics: Num rows: 3 Data size: 1836 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part_n0 + filterExpr: (employeesalary = 2000.0D) (type: boolean) + Statistics: Num rows: 3 Data size: 2254 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: employeeid (type: int), country (type: string) + outputColumnNames: employeeid, country + Statistics: Num rows: 3 Data size: 2254 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(VALUE._col0) - keys: 2000.0D (type: double), KEY._col1 (type: string) - mode: mergepartial + aggregations: compute_stats(employeeid, 'hll') + keys: 2000.0D (type: double), country (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 628 Basic stats: PARTIAL Column stats: PARTIAL - Select Operator - expressions: _col2 (type: struct), 2000.0D (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 628 Basic stats: PARTIAL Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 628 Basic stats: PARTIAL Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 3 Data size: 1836 Basic stats: PARTIAL Column stats: PARTIAL + Reduce Output Operator + key expressions: 2000.0D (type: double), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: 2000.0D (type: double), _col1 (type: string) + Statistics: Num rows: 3 Data size: 1836 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: 2000.0D (type: double), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 628 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: _col2 (type: struct), 2000.0D (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 628 Basic stats: PARTIAL Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 628 Basic stats: PARTIAL Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Stats Work Basic Stats Work: Column Stats Desc: @@ -399,63 +379,53 @@ POSTHOOK: Output: default@employee_part_n0@employeesalary=4000.0/country=USA #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: employee_part_n0 - Statistics: Num rows: 31 Data size: 6072 Basic stats: PARTIAL Column stats: PARTIAL - Select Operator - expressions: employeeid (type: int), employeesalary (type: double), country (type: string) - outputColumnNames: employeeid, employeesalary, country - Statistics: Num rows: 31 Data size: 6072 Basic stats: PARTIAL Column stats: PARTIAL - Group By Operator - aggregations: compute_stats(employeeid, 'hll') - keys: employeesalary (type: double), country (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 31 Data size: 19216 Basic stats: PARTIAL Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string) - Statistics: Num rows: 31 Data size: 19216 Basic stats: PARTIAL Column stats: PARTIAL - value expressions: _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part_n0 + Statistics: Num rows: 31 Data size: 6072 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: employeeid (type: int), employeesalary (type: double), country (type: string) + outputColumnNames: employeeid, employeesalary, country + Statistics: Num rows: 31 Data size: 6072 Basic stats: PARTIAL Column stats: PARTIAL Group By Operator - aggregations: compute_stats(VALUE._col0) - keys: KEY._col0 (type: double), KEY._col1 (type: string) - mode: mergepartial + aggregations: compute_stats(employeeid, 'hll') + keys: employeesalary (type: double), country (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 15 Data size: 9536 Basic stats: PARTIAL Column stats: PARTIAL - Select Operator - expressions: _col2 (type: struct), _col0 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 15 Data size: 9536 Basic stats: PARTIAL Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 9536 Basic stats: PARTIAL Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 31 Data size: 19216 Basic stats: PARTIAL Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: string) + Statistics: Num rows: 31 Data size: 19216 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: double), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 9536 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: _col2 (type: struct), _col0 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 9536 Basic stats: PARTIAL Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 9536 Basic stats: PARTIAL Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Stats Work Basic Stats Work: Column Stats Desc: @@ -553,63 +523,53 @@ POSTHOOK: Output: default@employee_part_n0@employeesalary=4000.0/country=USA #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: employee_part_n0 - Statistics: Num rows: 54 Data size: 15386 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: employeeid (type: int), employeename (type: string), employeesalary (type: double), country (type: string) - outputColumnNames: employeeid, employeename, employeesalary, country - Statistics: Num rows: 54 Data size: 15386 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') - keys: employeesalary (type: double), country (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 8448 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string) - Statistics: Num rows: 8 Data size: 8448 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: employee_part_n0 + Statistics: Num rows: 54 Data size: 15386 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: employeeid (type: int), employeename (type: string), employeesalary (type: double), country (type: string) + outputColumnNames: employeeid, employeename, employeesalary, country + Statistics: Num rows: 54 Data size: 15386 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: double), KEY._col1 (type: string) - mode: mergepartial + aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') + keys: employeesalary (type: double), country (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 8576 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 8576 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 8576 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 8 Data size: 8448 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: string) + Statistics: Num rows: 8 Data size: 8448 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: double), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 8576 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 8576 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 8576 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Stats Work Basic Stats Work: Column Stats Desc: @@ -662,7 +622,7 @@ data_type string min max num_nulls 0 -distinct_count 7 +distinct_count 12 avg_col_len 5.142857142857143 max_col_len 6 num_trues @@ -756,7 +716,7 @@ data_type string min max num_nulls 0 -distinct_count 7 +distinct_count 12 avg_col_len 5.142857142857143 max_col_len 6 num_trues @@ -826,7 +786,7 @@ data_type string min max num_nulls 0 -distinct_count 7 +distinct_count 12 avg_col_len 5.142857142857143 max_col_len 6 num_trues diff --git a/ql/src/test/results/clientpositive/columnstats_quoting.q.out b/ql/src/test/results/clientpositive/columnstats_quoting.q.out new file mode 100644 index 0000000000..222c0478ca --- /dev/null +++ b/ql/src/test/results/clientpositive/columnstats_quoting.q.out @@ -0,0 +1,148 @@ +PREHOOK: query: DROP TABLE IF EXISTS user_web_events +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS user_web_events +POSTHOOK: type: DROPTABLE +PREHOOK: query: create temporary table user_web_events(`user id` bigint, `user name` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@user_web_events +POSTHOOK: query: create temporary table user_web_events(`user id` bigint, `user name` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@user_web_events +PREHOOK: query: explain analyze table user_web_events compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@user_web_events +PREHOOK: Output: default@user_web_events +#### A masked pattern was here #### +POSTHOOK: query: explain analyze table user_web_events compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@user_web_events +POSTHOOK: Output: default@user_web_events +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: user_web_events + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: user id (type: bigint), user name (type: string) + outputColumnNames: user id, user name + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(user id, 'hll'), compute_stats(user name, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1072 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1072 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: user id, user name + Column Types: bigint, string + Table: default.user_web_events + +PREHOOK: query: analyze table user_web_events compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@user_web_events +PREHOOK: Output: default@user_web_events +#### A masked pattern was here #### +POSTHOOK: query: analyze table user_web_events compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@user_web_events +POSTHOOK: Output: default@user_web_events +#### A masked pattern was here #### +PREHOOK: query: explain analyze table user_web_events compute statistics for columns `user id` +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@user_web_events +PREHOOK: Output: default@user_web_events +#### A masked pattern was here #### +POSTHOOK: query: explain analyze table user_web_events compute statistics for columns `user id` +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@user_web_events +POSTHOOK: Output: default@user_web_events +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: user_web_events + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: user id (type: bigint) + outputColumnNames: user id + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(user id, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: user id + Column Types: bigint + Table: default.user_web_events + +PREHOOK: query: analyze table user_web_events compute statistics for columns `user id` +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@user_web_events +PREHOOK: Output: default@user_web_events +#### A masked pattern was here #### +POSTHOOK: query: analyze table user_web_events compute statistics for columns `user id` +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@user_web_events +POSTHOOK: Output: default@user_web_events +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out new file mode 100644 index 0000000000..f22d15c2d3 --- /dev/null +++ b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out @@ -0,0 +1,893 @@ +PREHOOK: query: DROP TABLE IF EXISTS UserVisits_web_text_none +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS UserVisits_web_text_none +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE UserVisits_web_text_none ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@UserVisits_web_text_none +POSTHOOK: query: CREATE TABLE UserVisits_web_text_none ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@UserVisits_web_text_none +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@uservisits_web_text_none +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@uservisits_web_text_none +PREHOOK: query: explain +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none +#### A masked pattern was here #### +POSTHOOK: query: explain +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: uservisits_web_text_none + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) + outputColumnNames: sourceip, adrevenue, avgtimeonsite + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: sourceIP, avgTimeOnSite, adRevenue + Column Types: string, int, float + Table: default.uservisits_web_text_none + +PREHOOK: query: explain extended +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none +#### A masked pattern was here #### +POSTHOOK: query: explain extended +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: uservisits_web_text_none + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics Aggregation Key Prefix: default.uservisits_web_text_none/ + GatherStats: true + Select Operator + expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) + outputColumnNames: sourceip, adrevenue, avgtimeonsite + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: uservisits_web_text_none + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite + columns.comments + columns.types string:string:string:float:string:string:string:string:int + field.delim | +#### A masked pattern was here #### + name default.uservisits_web_text_none + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct uservisits_web_text_none { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7060 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite + columns.comments + columns.types string:string:string:float:string:string:string:string:int + field.delim | +#### A masked pattern was here #### + name default.uservisits_web_text_none + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct uservisits_web_text_none { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7060 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.uservisits_web_text_none + name: default.uservisits_web_text_none + Truncated Path -> Alias: + /uservisits_web_text_none [uservisits_web_text_none] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.uservisits_web_text_none/ + Column Stats Desc: + Columns: sourceIP, avgTimeOnSite, adRevenue + Column Types: string, int, float + Table: default.uservisits_web_text_none + Is Table Level Stats: true + +PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none +#### A masked pattern was here #### +POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none +#### A masked pattern was here #### +PREHOOK: query: explain +analyze table default.UserVisits_web_text_none compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none +#### A masked pattern was here #### +POSTHOOK: query: explain +analyze table default.UserVisits_web_text_none compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: uservisits_web_text_none + Statistics: Num rows: 55 Data size: 65391 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: sourceip (type: string), desturl (type: string), visitdate (type: string), adrevenue (type: float), useragent (type: string), ccode (type: string), lcode (type: string), skeyword (type: string), avgtimeonsite (type: int) + outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite + Statistics: Num rows: 55 Data size: 65391 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(sourceip, 'hll'), compute_stats(desturl, 'hll'), compute_stats(visitdate, 'hll'), compute_stats(adrevenue, 'hll'), compute_stats(useragent, 'hll'), compute_stats(ccode, 'hll'), compute_stats(lcode, 'hll'), compute_stats(skeyword, 'hll'), compute_stats(avgtimeonsite, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite + Column Types: string, string, string, float, string, string, string, string, int + Table: default.uservisits_web_text_none + +PREHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none +#### A masked pattern was here #### +POSTHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none +#### A masked pattern was here #### +PREHOOK: query: describe formatted UserVisits_web_text_none destURL +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: describe formatted UserVisits_web_text_none destURL +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +col_name destURL +data_type string +min +max +num_nulls 0 +distinct_count 55 +avg_col_len 48.945454545454545 +max_col_len 96 +num_trues +num_falses +bit_vector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +PREHOOK: query: describe formatted UserVisits_web_text_none adRevenue +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: describe formatted UserVisits_web_text_none adRevenue +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +col_name adRevenue +data_type float +min 13.099044799804688 +max 492.98870849609375 +num_nulls 0 +distinct_count 55 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +PREHOOK: query: describe formatted UserVisits_web_text_none avgTimeOnSite +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: describe formatted UserVisits_web_text_none avgTimeOnSite +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +col_name avgTimeOnSite +data_type int +min 1 +max 9 +num_nulls 0 +distinct_count 9 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +PREHOOK: query: CREATE TABLE empty_tab( + a int, + b double, + c string, + d boolean, + e binary) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@empty_tab +POSTHOOK: query: CREATE TABLE empty_tab( + a int, + b double, + c string, + d boolean, + e binary) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@empty_tab +PREHOOK: query: explain +analyze table empty_tab compute statistics for columns a,b,c,d,e +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@empty_tab +PREHOOK: Output: default@empty_tab +#### A masked pattern was here #### +POSTHOOK: query: explain +analyze table empty_tab compute statistics for columns a,b,c,d,e +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@empty_tab +POSTHOOK: Output: default@empty_tab +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: empty_tab + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: double), c (type: string), d (type: boolean), e (type: binary) + outputColumnNames: a, b, c, d, e + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a, b, c, d, e + Column Types: int, double, string, boolean, binary + Table: default.empty_tab + +PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@empty_tab +PREHOOK: Output: default@empty_tab +#### A masked pattern was here #### +POSTHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@empty_tab +POSTHOOK: Output: default@empty_tab +#### A masked pattern was here #### +PREHOOK: query: create database if not exists dummydb +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:dummydb +POSTHOOK: query: create database if not exists dummydb +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:dummydb +PREHOOK: query: use dummydb +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:dummydb +POSTHOOK: query: use dummydb +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:dummydb +PREHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns destURL +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none +#### A masked pattern was here #### +POSTHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns destURL +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none +#### A masked pattern was here #### +PREHOOK: query: describe formatted default.UserVisits_web_text_none destURL +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: describe formatted default.UserVisits_web_text_none destURL +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +col_name destURL +data_type string +min +max +num_nulls 0 +distinct_count 55 +avg_col_len 48.945454545454545 +max_col_len 96 +num_trues +num_falses +bit_vector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +PREHOOK: query: CREATE TABLE UserVisits_in_dummy_db ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:dummydb +PREHOOK: Output: dummydb@UserVisits_in_dummy_db +POSTHOOK: query: CREATE TABLE UserVisits_in_dummy_db ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:dummydb +POSTHOOK: Output: dummydb@UserVisits_in_dummy_db +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_in_dummy_db +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: dummydb@uservisits_in_dummy_db +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_in_dummy_db +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: dummydb@uservisits_in_dummy_db +PREHOOK: query: use default +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:default +POSTHOOK: query: use default +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:default +PREHOOK: query: explain +analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: dummydb@uservisits_in_dummy_db +PREHOOK: Output: dummydb@uservisits_in_dummy_db +#### A masked pattern was here #### +POSTHOOK: query: explain +analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: Output: dummydb@uservisits_in_dummy_db +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: uservisits_in_dummy_db + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) + outputColumnNames: sourceip, adrevenue, avgtimeonsite + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: sourceIP, avgTimeOnSite, adRevenue + Column Types: string, int, float + Table: dummydb.uservisits_in_dummy_db + +PREHOOK: query: explain extended +analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: dummydb@uservisits_in_dummy_db +PREHOOK: Output: dummydb@uservisits_in_dummy_db +#### A masked pattern was here #### +POSTHOOK: query: explain extended +analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: Output: dummydb@uservisits_in_dummy_db +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: uservisits_in_dummy_db + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics Aggregation Key Prefix: dummydb.uservisits_in_dummy_db/ + GatherStats: true + Select Operator + expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) + outputColumnNames: sourceip, adrevenue, avgtimeonsite + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: uservisits_in_dummy_db + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite + columns.comments + columns.types string:string:string:float:string:string:string:string:int + field.delim | +#### A masked pattern was here #### + name dummydb.uservisits_in_dummy_db + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct uservisits_in_dummy_db { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7060 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite + columns.comments + columns.types string:string:string:float:string:string:string:string:int + field.delim | +#### A masked pattern was here #### + name dummydb.uservisits_in_dummy_db + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct uservisits_in_dummy_db { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7060 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dummydb.uservisits_in_dummy_db + name: dummydb.uservisits_in_dummy_db + Truncated Path -> Alias: + /dummydb.db/uservisits_in_dummy_db [uservisits_in_dummy_db] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: dummydb.uservisits_in_dummy_db/ + Column Stats Desc: + Columns: sourceIP, avgTimeOnSite, adRevenue + Column Types: string, int, float + Table: dummydb.uservisits_in_dummy_db + Is Table Level Stats: true + +PREHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: dummydb@uservisits_in_dummy_db +PREHOOK: Output: dummydb@uservisits_in_dummy_db +#### A masked pattern was here #### +POSTHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: Output: dummydb@uservisits_in_dummy_db +#### A masked pattern was here #### +PREHOOK: query: explain +analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: dummydb@uservisits_in_dummy_db +PREHOOK: Output: dummydb@uservisits_in_dummy_db +#### A masked pattern was here #### +POSTHOOK: query: explain +analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: Output: dummydb@uservisits_in_dummy_db +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: uservisits_in_dummy_db + Statistics: Num rows: 55 Data size: 65391 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: sourceip (type: string), desturl (type: string), visitdate (type: string), adrevenue (type: float), useragent (type: string), ccode (type: string), lcode (type: string), skeyword (type: string), avgtimeonsite (type: int) + outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite + Statistics: Num rows: 55 Data size: 65391 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(sourceip, 'hll'), compute_stats(desturl, 'hll'), compute_stats(visitdate, 'hll'), compute_stats(adrevenue, 'hll'), compute_stats(useragent, 'hll'), compute_stats(ccode, 'hll'), compute_stats(lcode, 'hll'), compute_stats(skeyword, 'hll'), compute_stats(avgtimeonsite, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite + Column Types: string, string, string, float, string, string, string, string, int + Table: dummydb.uservisits_in_dummy_db + +PREHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: dummydb@uservisits_in_dummy_db +PREHOOK: Output: dummydb@uservisits_in_dummy_db +#### A masked pattern was here #### +POSTHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: Output: dummydb@uservisits_in_dummy_db +#### A masked pattern was here #### +PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db destURL +PREHOOK: type: DESCTABLE +PREHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db destURL +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +col_name destURL +data_type string +min +max +num_nulls 0 +distinct_count 55 +avg_col_len 48.945454545454545 +max_col_len 96 +num_trues +num_falses +bit_vector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db adRevenue +PREHOOK: type: DESCTABLE +PREHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db adRevenue +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +col_name adRevenue +data_type float +min 13.099044799804688 +max 492.98870849609375 +num_nulls 0 +distinct_count 55 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db avgTimeOnSite +PREHOOK: type: DESCTABLE +PREHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db avgTimeOnSite +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +col_name avgTimeOnSite +data_type int +min 1 +max 9 +num_nulls 0 +distinct_count 9 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +PREHOOK: query: drop table dummydb.UserVisits_in_dummy_db +PREHOOK: type: DROPTABLE +PREHOOK: Input: dummydb@uservisits_in_dummy_db +PREHOOK: Output: dummydb@uservisits_in_dummy_db +POSTHOOK: query: drop table dummydb.UserVisits_in_dummy_db +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: Output: dummydb@uservisits_in_dummy_db +PREHOOK: query: drop database dummydb +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:dummydb +PREHOOK: Output: database:dummydb +POSTHOOK: query: drop database dummydb +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:dummydb +POSTHOOK: Output: database:dummydb diff --git a/ql/src/test/results/clientpositive/llap/combine2.q.out b/ql/src/test/results/clientpositive/combine2.q.out similarity index 79% rename from ql/src/test/results/clientpositive/llap/combine2.q.out rename to ql/src/test/results/clientpositive/combine2.q.out index 944d4e2dc4..8e4a92be36 100644 --- a/ql/src/test/results/clientpositive/llap/combine2.q.out +++ b/ql/src/test/results/clientpositive/combine2.q.out @@ -98,9 +98,11 @@ STAGE PLANS: TableScan alias: combine2_n0 filterExpr: value is not null (type: boolean) + Statistics: Num rows: 12 Data size: 3240 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 3240 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select key, value from combine2_n0 where value is not null @@ -195,54 +197,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ds (type: string) - outputColumnNames: ds - Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: ds (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ds (type: string) + outputColumnNames: ds + Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial + aggregations: count() + keys: ds (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/comments.q.out b/ql/src/test/results/clientpositive/comments.q.out new file mode 100644 index 0000000000..b94e2cea49 --- /dev/null +++ b/ql/src/test/results/clientpositive/comments.q.out @@ -0,0 +1,250 @@ +PREHOOK: query: select key from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +PREHOOK: query: /* comment comment */ +select key from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: /* comment comment */ +select key from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +PREHOOK: query: select /*comment*/ key from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select /*comment*/ key from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +PREHOOK: query: select /*comment*/ key from /* comment */ src /* comment */ limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select /*comment*/ key from /* comment */ src /* comment */ limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +PREHOOK: query: select /**/ key /* */ from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select /**/ key /* */ from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +PREHOOK: query: /* + +*/ +select /* +*/ key from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: /* + +*/ +select /* +*/ key from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +PREHOOK: query: select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1019 +PREHOOK: query: explain extended select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain extended select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` +FROM (SELECT `key` +FROM `default`.`src` +WHERE `key` > 0) AS `t0` +INNER JOIN (SELECT `key` +FROM `default`.`src` +WHERE `key` > 0) AS `t2` ON `t0`.`key` = `t2`.`key` +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:a + TableScan + alias: a + filterExpr: (UDFToDouble(key) > 0.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (UDFToDouble(key) > 0.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: b + filterExpr: (UDFToDouble(key) > 0.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (UDFToDouble(key) > 0.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 + Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [$hdt$_1:b] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/complex_alias.q.out b/ql/src/test/results/clientpositive/complex_alias.q.out new file mode 100644 index 0000000000..e8cdc218ed --- /dev/null +++ b/ql/src/test/results/clientpositive/complex_alias.q.out @@ -0,0 +1,273 @@ +PREHOOK: query: CREATE TABLE agg1 (col0 INT, col1 STRING, col2 DOUBLE) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@agg1 +POSTHOOK: query: CREATE TABLE agg1 (col0 INT, col1 STRING, col2 DOUBLE) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@agg1 +PREHOOK: query: INSERT INTO TABLE agg1 select key,value,key from src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@agg1 +POSTHOOK: query: INSERT INTO TABLE agg1 select key,value,key from src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@agg1 +POSTHOOK: Lineage: agg1.col0 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: agg1.col1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: agg1.col2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT single_use_subq11.a1 AS a1, + single_use_subq11.a2 AS a2 +FROM (SELECT Sum(agg1.col2) AS a1 + FROM agg1 + GROUP BY agg1.col0) single_use_subq12 + JOIN (SELECT alias.a2 AS a0, + alias.a1 AS a1, + alias.a1 AS a2 + FROM (SELECT agg1.col1 AS a0, + '42' AS a1, + agg1.col0 AS a2 + FROM agg1 + UNION ALL + SELECT agg1.col1 AS a0, + '41' AS a1, + agg1.col0 AS a2 + FROM agg1) alias + GROUP BY alias.a2, + alias.a1) single_use_subq11 + ON ( single_use_subq11.a0 = single_use_subq11.a0 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@agg1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT single_use_subq11.a1 AS a1, + single_use_subq11.a2 AS a2 +FROM (SELECT Sum(agg1.col2) AS a1 + FROM agg1 + GROUP BY agg1.col0) single_use_subq12 + JOIN (SELECT alias.a2 AS a0, + alias.a1 AS a1, + alias.a1 AS a2 + FROM (SELECT agg1.col1 AS a0, + '42' AS a1, + agg1.col0 AS a2 + FROM agg1 + UNION ALL + SELECT agg1.col1 AS a0, + '41' AS a1, + agg1.col0 AS a2 + FROM agg1) alias + GROUP BY alias.a2, + alias.a1) single_use_subq11 + ON ( single_use_subq11.a0 = single_use_subq11.a0 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@agg1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: agg1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: col0 (type: int) + outputColumnNames: col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: agg1 + filterExpr: col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '42' (type: string), col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: agg1 + filterExpr: col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '41' (type: string), col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT single_use_subq11.a1 AS a1, + single_use_subq11.a2 AS a2 +FROM (SELECT Sum(agg1.col2) AS a1 + FROM agg1 + GROUP BY agg1.col0) single_use_subq12 + JOIN (SELECT alias.a2 AS a0, + alias.a1 AS a1, + alias.a1 AS a2 + FROM (SELECT agg1.col1 AS a0, + '42' AS a1, + agg1.col0 AS a2 + FROM agg1 + UNION ALL + SELECT agg1.col1 AS a0, + '41' AS a1, + agg1.col0 AS a2 + FROM agg1) alias + GROUP BY alias.a2, + alias.a1) single_use_subq11 + ON ( single_use_subq11.a0 = single_use_subq11.a0 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@agg1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT single_use_subq11.a1 AS a1, + single_use_subq11.a2 AS a2 +FROM (SELECT Sum(agg1.col2) AS a1 + FROM agg1 + GROUP BY agg1.col0) single_use_subq12 + JOIN (SELECT alias.a2 AS a0, + alias.a1 AS a1, + alias.a1 AS a2 + FROM (SELECT agg1.col1 AS a0, + '42' AS a1, + agg1.col0 AS a2 + FROM agg1 + UNION ALL + SELECT agg1.col1 AS a0, + '41' AS a1, + agg1.col0 AS a2 + FROM agg1) alias + GROUP BY alias.a2, + alias.a1) single_use_subq11 + ON ( single_use_subq11.a0 = single_use_subq11.a0 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@agg1 +#### A masked pattern was here #### +42 42 +41 41 diff --git a/ql/src/test/results/clientpositive/llap/compute_stats_date.q.out b/ql/src/test/results/clientpositive/compute_stats_date.q.out similarity index 71% rename from ql/src/test/results/clientpositive/llap/compute_stats_date.q.out rename to ql/src/test/results/clientpositive/compute_stats_date.q.out index ab5cdf0cc8..fe36adb1cd 100644 --- a/ql/src/test/results/clientpositive/llap/compute_stats_date.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_date.q.out @@ -58,55 +58,45 @@ POSTHOOK: Output: default@tab_date #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: tab_date - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: fl_date (type: date) - outputColumnNames: fl_date - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(fl_date, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: tab_date + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: fl_date (type: date) + outputColumnNames: fl_date + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(VALUE._col0) - mode: mergepartial + aggregations: compute_stats(fl_date, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/llap/concat_op.q.out b/ql/src/test/results/clientpositive/concat_op.q.out similarity index 78% rename from ql/src/test/results/clientpositive/llap/concat_op.q.out rename to ql/src/test/results/clientpositive/concat_op.q.out index f81215c835..4442658387 100644 --- a/ql/src/test/results/clientpositive/llap/concat_op.q.out +++ b/ql/src/test/results/clientpositive/concat_op.q.out @@ -7,19 +7,34 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: concat(key, value) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src - Select Operator - expressions: concat(key, value) (type: string) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: select concat('a','b','c') PREHOOK: type: QUERY @@ -246,9 +261,11 @@ STAGE PLANS: TableScan alias: _dummy_table Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'abc' (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select 'a' || 'b' || 'c' @@ -270,9 +287,11 @@ STAGE PLANS: TableScan alias: _dummy_table Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'abc' (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select true and false or false and true or true @@ -292,7 +311,7 @@ POSTHOOK: query: explain formatted select key || value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 9.624,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"key\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"value\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"key\",\n \"ndv\": 316\n },\n {\n \"name\": \"value\",\n \"ndv\": 307\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"_o__c0\"\n ],\n \"exprs\": [\n {\n \"op\": {\n \"name\": \"||\",\n \"kind\": \"OTHER_FUNCTION\",\n \"syntax\": \"SPECIAL\"\n },\n \"operands\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n }\n ],\n \"rowCount\": 500.0\n }\n ]\n}","optimizedSQL":"SELECT `key` || `value` AS `_o__c0`\nFROM `default`.`src`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","table:":"src","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"concat(key, value) (type: string)","columnExprMap:":{"_col0":"concat(key, value)"},"outputColumnNames:":["_col0"],"OperatorId:":"SEL_1","children":{"ListSink":{"OperatorId:":"LIST_SINK_3"}}}}}}}}}} +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 9.624,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"key\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"value\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"key\",\n \"ndv\": 316\n },\n {\n \"name\": \"value\",\n \"ndv\": 307\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"_o__c0\"\n ],\n \"exprs\": [\n {\n \"op\": {\n \"name\": \"||\",\n \"kind\": \"OTHER_FUNCTION\",\n \"syntax\": \"SPECIAL\"\n },\n \"operands\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n }\n ],\n \"rowCount\": 500.0\n }\n ]\n}","optimizedSQL":"SELECT `key` || `value` AS `_o__c0`\nFROM `default`.`src`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"concat(key, value) (type: string)","columnExprMap:":{"_col0":"concat(key, value)"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_4"}}}}}}],"Execution mode:":"vectorized"}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_5"}}}}}} PREHOOK: query: explain formatted select key || value || key from src PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -301,7 +320,7 @@ POSTHOOK: query: explain formatted select key || value || key from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 9.624,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"key\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"value\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"key\",\n \"ndv\": 316\n },\n {\n \"name\": \"value\",\n \"ndv\": 307\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"_o__c0\"\n ],\n \"exprs\": [\n {\n \"op\": {\n \"name\": \"||\",\n \"kind\": \"OTHER_FUNCTION\",\n \"syntax\": \"SPECIAL\"\n },\n \"operands\": [\n {\n \"op\": {\n \"name\": \"||\",\n \"kind\": \"OTHER_FUNCTION\",\n \"syntax\": \"SPECIAL\"\n },\n \"operands\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n },\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n }\n ],\n \"rowCount\": 500.0\n }\n ]\n}","optimizedSQL":"SELECT `key` || `value` || `key` AS `_o__c0`\nFROM `default`.`src`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","table:":"src","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"concat(concat(key, value), key) (type: string)","columnExprMap:":{"_col0":"concat(concat(key, value), key)"},"outputColumnNames:":["_col0"],"OperatorId:":"SEL_1","children":{"ListSink":{"OperatorId:":"LIST_SINK_3"}}}}}}}}}} +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 9.624,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"key\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"value\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"key\",\n \"ndv\": 316\n },\n {\n \"name\": \"value\",\n \"ndv\": 307\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"_o__c0\"\n ],\n \"exprs\": [\n {\n \"op\": {\n \"name\": \"||\",\n \"kind\": \"OTHER_FUNCTION\",\n \"syntax\": \"SPECIAL\"\n },\n \"operands\": [\n {\n \"op\": {\n \"name\": \"||\",\n \"kind\": \"OTHER_FUNCTION\",\n \"syntax\": \"SPECIAL\"\n },\n \"operands\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n },\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n }\n ],\n \"rowCount\": 500.0\n }\n ]\n}","optimizedSQL":"SELECT `key` || `value` || `key` AS `_o__c0`\nFROM `default`.`src`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"concat(concat(key, value), key) (type: string)","columnExprMap:":{"_col0":"concat(concat(key, value), key)"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_4"}}}}}}],"Execution mode:":"vectorized"}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_5"}}}}}} PREHOOK: query: explain formatted select key || value || key || value from src PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -310,4 +329,4 @@ POSTHOOK: query: explain formatted select key || value || key || value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 9.624,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"key\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"value\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"key\",\n \"ndv\": 316\n },\n {\n \"name\": \"value\",\n \"ndv\": 307\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"_o__c0\"\n ],\n \"exprs\": [\n {\n \"op\": {\n \"name\": \"||\",\n \"kind\": \"OTHER_FUNCTION\",\n \"syntax\": \"SPECIAL\"\n },\n \"operands\": [\n {\n \"op\": {\n \"name\": \"||\",\n \"kind\": \"OTHER_FUNCTION\",\n \"syntax\": \"SPECIAL\"\n },\n \"operands\": [\n {\n \"op\": {\n \"name\": \"||\",\n \"kind\": \"OTHER_FUNCTION\",\n \"syntax\": \"SPECIAL\"\n },\n \"operands\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n },\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n }\n ],\n \"rowCount\": 500.0\n }\n ]\n}","optimizedSQL":"SELECT `key` || `value` || `key` || `value` AS `_o__c0`\nFROM `default`.`src`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","table:":"src","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"concat(concat(concat(key, value), key), value) (type: string)","columnExprMap:":{"_col0":"concat(concat(concat(key, value), key), value)"},"outputColumnNames:":["_col0"],"OperatorId:":"SEL_1","children":{"ListSink":{"OperatorId:":"LIST_SINK_3"}}}}}}}}}} +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 9.624,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"key\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"value\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"key\",\n \"ndv\": 316\n },\n {\n \"name\": \"value\",\n \"ndv\": 307\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"_o__c0\"\n ],\n \"exprs\": [\n {\n \"op\": {\n \"name\": \"||\",\n \"kind\": \"OTHER_FUNCTION\",\n \"syntax\": \"SPECIAL\"\n },\n \"operands\": [\n {\n \"op\": {\n \"name\": \"||\",\n \"kind\": \"OTHER_FUNCTION\",\n \"syntax\": \"SPECIAL\"\n },\n \"operands\": [\n {\n \"op\": {\n \"name\": \"||\",\n \"kind\": \"OTHER_FUNCTION\",\n \"syntax\": \"SPECIAL\"\n },\n \"operands\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n },\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n }\n ],\n \"rowCount\": 500.0\n }\n ]\n}","optimizedSQL":"SELECT `key` || `value` || `key` || `value` AS `_o__c0`\nFROM `default`.`src`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"concat(concat(concat(key, value), key), value) (type: string)","columnExprMap:":{"_col0":"concat(concat(concat(key, value), key), value)"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_4"}}}}}}],"Execution mode:":"vectorized"}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_5"}}}}}} diff --git a/ql/src/test/results/clientpositive/llap/constGby.q.out b/ql/src/test/results/clientpositive/constGby.q.out similarity index 69% rename from ql/src/test/results/clientpositive/llap/constGby.q.out rename to ql/src/test/results/clientpositive/constGby.q.out index 322cdcbf19..dc2ecdced0 100644 --- a/ql/src/test/results/clientpositive/llap/constGby.q.out +++ b/ql/src/test/results/clientpositive/constGby.q.out @@ -38,55 +38,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1_n36 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: true (type: boolean) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: t1_n36 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: boolean) - mode: mergepartial + aggregations: count() + keys: true (type: boolean) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: boolean) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/constantPropWhen.q.out b/ql/src/test/results/clientpositive/constantPropWhen.q.out similarity index 51% rename from ql/src/test/results/clientpositive/llap/constantPropWhen.q.out rename to ql/src/test/results/clientpositive/constantPropWhen.q.out index 6056b082c1..3c42b68e9f 100644 --- a/ql/src/test/results/clientpositive/llap/constantPropWhen.q.out +++ b/ql/src/test/results/clientpositive/constantPropWhen.q.out @@ -29,19 +29,34 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_1_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1_n4 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (id = id2) is not true (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: test_1_n4 - Select Operator - expressions: (id = id2) is not true (type: boolean) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: SELECT cast(CASE WHEN id = id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1_n4 PREHOOK: type: QUERY @@ -64,19 +79,34 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_1_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1_n4 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (id = id2) is not true (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: test_1_n4 - Select Operator - expressions: (id = id2) is not true (type: boolean) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: SELECT cast(CASE id when id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1_n4 PREHOOK: type: QUERY @@ -99,19 +129,34 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_1_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1_n4 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (id = id2) is true (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: test_1_n4 - Select Operator - expressions: (id = id2) is true (type: boolean) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: SELECT cast(CASE WHEN id = id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1_n4 PREHOOK: type: QUERY @@ -134,19 +179,34 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_1_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1_n4 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (id = id2) is true (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: test_1_n4 - Select Operator - expressions: (id = id2) is true (type: boolean) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: SELECT cast(CASE id when id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1_n4 PREHOOK: type: QUERY @@ -169,19 +229,34 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_1_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1_n4 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (not COALESCE((id = id2),false)) (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: test_1_n4 - Select Operator - expressions: (not COALESCE((id = id2),false)) (type: boolean) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: SELECT cast(CASE WHEN id = id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1_n4 PREHOOK: type: QUERY @@ -204,19 +279,34 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_1_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1_n4 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (not COALESCE((id = id2),false)) (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: test_1_n4 - Select Operator - expressions: (not COALESCE((id = id2),false)) (type: boolean) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: SELECT cast(CASE id when id2 THEN FALSE ELSE TRUE END AS BOOLEAN) AS b FROM test_1_n4 PREHOOK: type: QUERY @@ -239,19 +329,34 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_1_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1_n4 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: COALESCE((id = id2),false) (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: test_1_n4 - Select Operator - expressions: COALESCE((id = id2),false) (type: boolean) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: SELECT cast(CASE WHEN id = id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1_n4 PREHOOK: type: QUERY @@ -274,19 +379,34 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_1_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1_n4 + Statistics: Num rows: 4 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: COALESCE((id = id2),false) (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: test_1_n4 - Select Operator - expressions: COALESCE((id = id2),false) (type: boolean) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: SELECT cast(CASE id when id2 THEN TRUE ELSE FALSE END AS BOOLEAN) AS b FROM test_1_n4 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out b/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out new file mode 100644 index 0000000000..34502947c3 --- /dev/null +++ b/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out @@ -0,0 +1,268 @@ +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain extended + select * from (select a.key as ak, a.value as av, b.key as bk, b.value as bv from src a join src1 b where a.key = '429' ) c +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain extended + select * from (select a.key as ak, a.value as av, b.key as bk, b.value as bv from src a join src1 b where a.key = '429' ) c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT CAST('429' AS STRING) AS `ak`, `t0`.`value` AS `av`, `t1`.`key` AS `bk`, `t1`.`value` AS `bv` +FROM (SELECT `value` +FROM `default`.`src` +WHERE `key` = '429') AS `t0`, +(SELECT `key`, `value` +FROM `default`.`src1`) AS `t1` +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: (key = '429') (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '429') (type: boolean) + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col0 (type: string) + auto parallelism: false + TableScan + alias: b + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src [$hdt$_0:a] + /src1 [$hdt$_1:b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 13300 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '429' (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 50 Data size: 17650 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 50 Data size: 17650 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: select * from (select a.key as ak, a.value as av, b.key as bk, b.value as bv from src a join src1 b where a.key = '429' ) c +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key as ak, a.value as av, b.key as bk, b.value as bv from src a join src1 b where a.key = '429' ) c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +429 val_429 +429 val_429 +429 val_429 +429 val_429 +429 val_429 +429 val_429 +429 val_429 +429 val_429 +429 val_429 val_165 +429 val_429 val_165 +429 val_429 val_193 +429 val_429 val_193 +429 val_429 val_265 +429 val_429 val_265 +429 val_429 val_27 +429 val_429 val_27 +429 val_429 val_409 +429 val_429 val_409 +429 val_429 val_484 +429 val_429 val_484 +429 val_429 128 +429 val_429 128 +429 val_429 146 val_146 +429 val_429 146 val_146 +429 val_429 150 val_150 +429 val_429 150 val_150 +429 val_429 213 val_213 +429 val_429 213 val_213 +429 val_429 224 +429 val_429 224 +429 val_429 238 val_238 +429 val_429 238 val_238 +429 val_429 255 val_255 +429 val_429 255 val_255 +429 val_429 273 val_273 +429 val_429 273 val_273 +429 val_429 278 val_278 +429 val_429 278 val_278 +429 val_429 311 val_311 +429 val_429 311 val_311 +429 val_429 369 +429 val_429 369 +429 val_429 401 val_401 +429 val_429 401 val_401 +429 val_429 406 val_406 +429 val_429 406 val_406 +429 val_429 66 val_66 +429 val_429 66 val_66 +429 val_429 98 val_98 +429 val_429 98 val_98 diff --git a/ql/src/test/results/clientpositive/llap/constant_prop.q.out b/ql/src/test/results/clientpositive/constant_prop.q.out similarity index 92% rename from ql/src/test/results/clientpositive/llap/constant_prop.q.out rename to ql/src/test/results/clientpositive/constant_prop.q.out index 1772fa2dd3..43dfac86f1 100644 --- a/ql/src/test/results/clientpositive/llap/constant_prop.q.out +++ b/ql/src/test/results/clientpositive/constant_prop.q.out @@ -35,9 +35,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: named_struct(if(array_contains(array(1,2), 3), 'F1', 'B1'),1,if(array_contains(map_keys(map('b':'x')), 'b'), 'F2', 'B2'),2) (type: struct), named_struct(if(array_contains(array(1,2), 3), 'F1', 'B1'),1,if(array_contains(map_keys(map('b':'x')), 'b'), 'F2', 'B2'),2).f2 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 30000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: SELECT NAMED_STRUCT( diff --git a/ql/src/test/results/clientpositive/constant_prop_1.q.out b/ql/src/test/results/clientpositive/constant_prop_1.q.out new file mode 100644 index 0000000000..5b7cd95e7d --- /dev/null +++ b/ql/src/test/results/clientpositive/constant_prop_1.q.out @@ -0,0 +1,595 @@ +PREHOOK: query: explain +select 1 as a from src +union all +select 1 as a from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select 1 as a from src +union all +select 1 as a from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[14][tables = [sub, b]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +select a, key, value from +( +select 1 as a from src +union all +select 1 as a from src limit 1 +)sub join src b where value='12345' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select a, key, value from +( +select 1 as a from src +union all +select 1 as a from src limit 1 +)sub join src b where value='12345' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: b + filterExpr: (value = '12345') (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value = '12345') (type: boolean) + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1 + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int), _col1 (type: string), '12345' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select 1 as a from src +union all +select 2 as a from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select 1 as a from src +union all +select 2 as a from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[14][tables = [sub, b]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +select a, key, value from +( +select 1 as a from src +union all +select 2 as a from src limit 1 +)sub join src b where value='12345' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select a, key, value from +( +select 1 as a from src +union all +select 2 as a from src limit 1 +)sub join src b where value='12345' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int) + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + TableScan + alias: b + filterExpr: (value = '12345') (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value = '12345') (type: boolean) + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12345' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[4][tables = [a, b]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain +select a.key, b.value from src a join src b where a.key = '238' and b.value = '234' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select a.key, b.value from src a join src b where a.key = '238' and b.value = '234' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: (key = '238') (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '238') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: b + filterExpr: (value = '234') (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value = '234') (type: boolean) + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '238' (type: string), '234' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, b.value from src a join src b on a.key=b.key where b.value = '234' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select a.key, b.value from src a join src b on a.key=b.key where b.value = '234' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: b + filterExpr: (key is not null and (value = '234')) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and (value = '234')) (type: boolean) + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), '234' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 522 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 522 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table t_n26 ( +a int, +b int, +c int, +d int, +e int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_n26 +POSTHOOK: query: create table t_n26 ( +a int, +b int, +c int, +d int, +e int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_n26 +PREHOOK: query: explain +select a2 as a3 from +(select a1 as a2, c1 as c2 from +(select a as a1, b as b1, c as c1 from t_n26 where a=1 and b=2 and c=3)sub1)sub2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_n26 +#### A masked pattern was here #### +POSTHOOK: query: explain +select a2 as a3 from +(select a1 as a2, c1 as c2 from +(select a as a1, b as b1, c as c1 from t_n26 where a=1 and b=2 and c=3)sub1)sub2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_n26 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t_n26 + filterExpr: ((a = 1) and (b = 2) and (c = 3)) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((a = 1) and (b = 2) and (c = 3)) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/constant_prop_2.q.out b/ql/src/test/results/clientpositive/constant_prop_2.q.out new file mode 100644 index 0000000000..5b30263bf7 --- /dev/null +++ b/ql/src/test/results/clientpositive/constant_prop_2.q.out @@ -0,0 +1,100 @@ +PREHOOK: query: explain select count('1') from src group by '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select count('1') from src group by '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select count('1') from src group by '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count('1') from src group by '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +500 +PREHOOK: query: explain +analyze table srcpart partition (ds='2008-04-08',hr=11) compute statistics for columns key, value +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart +PREHOOK: Output: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: explain +analyze table srcpart partition (ds='2008-04-08',hr=11) compute statistics for columns key, value +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart +POSTHOOK: Output: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + filterExpr: ((ds = '2008-04-08') and (hr = '11')) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-04-08' (type: string), '11' (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '11' (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '11' (type: string) + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-04-08' (type: string), '11' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.srcpart + diff --git a/ql/src/test/results/clientpositive/constant_prop_3.q.out b/ql/src/test/results/clientpositive/constant_prop_3.q.out new file mode 100644 index 0000000000..6c47f613d1 --- /dev/null +++ b/ql/src/test/results/clientpositive/constant_prop_3.q.out @@ -0,0 +1,454 @@ +PREHOOK: query: drop table part_hive +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table part_hive +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table partsupp_hive +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table partsupp_hive +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table supplier_hive +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table supplier_hive +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table part_hive (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, +P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_hive +POSTHOOK: query: create table part_hive (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, +P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_hive +PREHOOK: query: create table partsupp_hive (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, +PS_COMMENT STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partsupp_hive +POSTHOOK: query: create table partsupp_hive (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, +PS_COMMENT STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partsupp_hive +PREHOOK: query: create table supplier_hive (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, +S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@supplier_hive +POSTHOOK: query: create table supplier_hive (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, +S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@supplier_hive +PREHOOK: query: analyze table part_hive compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@part_hive +PREHOOK: Output: default@part_hive +POSTHOOK: query: analyze table part_hive compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_hive +POSTHOOK: Output: default@part_hive +PREHOOK: query: analyze table part_hive compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@part_hive +PREHOOK: Output: default@part_hive +#### A masked pattern was here #### +POSTHOOK: query: analyze table part_hive compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@part_hive +POSTHOOK: Output: default@part_hive +#### A masked pattern was here #### +PREHOOK: query: analyze table partsupp_hive compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@partsupp_hive +PREHOOK: Output: default@partsupp_hive +POSTHOOK: query: analyze table partsupp_hive compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partsupp_hive +POSTHOOK: Output: default@partsupp_hive +PREHOOK: query: analyze table partsupp_hive compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@partsupp_hive +PREHOOK: Output: default@partsupp_hive +#### A masked pattern was here #### +POSTHOOK: query: analyze table partsupp_hive compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@partsupp_hive +POSTHOOK: Output: default@partsupp_hive +#### A masked pattern was here #### +PREHOOK: query: analyze table supplier_hive compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@supplier_hive +PREHOOK: Output: default@supplier_hive +POSTHOOK: query: analyze table supplier_hive compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@supplier_hive +POSTHOOK: Output: default@supplier_hive +PREHOOK: query: analyze table supplier_hive compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@supplier_hive +PREHOOK: Output: default@supplier_hive +#### A masked pattern was here #### +POSTHOOK: query: analyze table supplier_hive compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@supplier_hive +POSTHOOK: Output: default@supplier_hive +#### A masked pattern was here #### +Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp_hive, + part_hive +where + p_partkey = ps_partkey + and p_brand <> 'Brand#34' + and p_type not like 'ECONOMY BRUSHED%' + and p_size in (22, 14, 27, 49, 21, 33, 35, 28) + and partsupp_hive.ps_suppkey not in ( + select + s_suppkey + from + supplier_hive + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size +PREHOOK: type: QUERY +PREHOOK: Input: default@part_hive +PREHOOK: Input: default@partsupp_hive +PREHOOK: Input: default@supplier_hive +#### A masked pattern was here #### +POSTHOOK: query: explain select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp_hive, + part_hive +where + p_partkey = ps_partkey + and p_brand <> 'Brand#34' + and p_type not like 'ECONOMY BRUSHED%' + and p_size in (22, 14, 27, 49, 21, 33, 35, 28) + and partsupp_hive.ps_suppkey not in ( + select + s_suppkey + from + supplier_hive + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_hive +POSTHOOK: Input: default@partsupp_hive +POSTHOOK: Input: default@supplier_hive +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-1 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-1, Stage-6 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: supplier_hive + filterExpr: (s_comment like '%Customer%Complaints%') (type: boolean) + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (s_comment like '%Customer%Complaints%') (type: boolean) + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_suppkey (type: int) + outputColumnNames: s_suppkey + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(s_suppkey) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part_hive + filterExpr: ((p_size) IN (22, 14, 27, 49, 21, 33, 35, 28) and (p_brand <> 'Brand#34') and p_partkey is not null and (not (p_type like 'ECONOMY BRUSHED%'))) (type: boolean) + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((p_size) IN (22, 14, 27, 49, 21, 33, 35, 28) and (p_brand <> 'Brand#34') and p_partkey is not null and (not (p_type like 'ECONOMY BRUSHED%'))) (type: boolean) + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: int) + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col3 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7, _col9 + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col7 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col9 (type: boolean) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9 + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col6 = 0L) or (_col9 is null and (_col7 >= _col6) and _col1 is not null)) (type: boolean) + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col1 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int) + null sort order: zzzz + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col3:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: int) + null sort order: zzzz + sort order: -+++ + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: supplier_hive + filterExpr: ((s_comment like '%Customer%Complaints%') and s_suppkey is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((s_comment like '%Customer%Complaints%') and s_suppkey is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_suppkey (type: int) + outputColumnNames: s_suppkey + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: s_suppkey (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: partsupp_hive + filterExpr: ps_partkey is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ps_partkey is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ps_partkey (type: int), ps_suppkey (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/constant_prop_timestamp_date_cast.q.out b/ql/src/test/results/clientpositive/constant_prop_timestamp_date_cast.q.out new file mode 100644 index 0000000000..cf04d306a3 --- /dev/null +++ b/ql/src/test/results/clientpositive/constant_prop_timestamp_date_cast.q.out @@ -0,0 +1,74 @@ +PREHOOK: query: CREATE TABLE constant_prop(start_dt timestamp, stop_dt timestamp) STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@constant_prop +POSTHOOK: query: CREATE TABLE constant_prop(start_dt timestamp, stop_dt timestamp) STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@constant_prop +PREHOOK: query: explain UPDATE constant_prop SET stop_dt = CURRENT_TIMESTAMP WHERE CAST(start_dt AS DATE) = CURRENT_DATE +PREHOOK: type: QUERY +PREHOOK: Input: default@constant_prop +PREHOOK: Output: default@constant_prop +POSTHOOK: query: explain UPDATE constant_prop SET stop_dt = CURRENT_TIMESTAMP WHERE CAST(start_dt AS DATE) = CURRENT_DATE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@constant_prop +POSTHOOK: Output: default@constant_prop +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: constant_prop + filterExpr: (CAST( start_dt AS DATE) = DATE'2020-03-05') (type: boolean) + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (CAST( start_dt AS DATE) = DATE'2020-03-05') (type: boolean) + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID (type: struct), start_dt (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: timestamp), TIMESTAMP'2020-03-05 14:16:57' (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.constant_prop + Write Type: UPDATE + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.constant_prop + Write Type: UPDATE + + Stage: Stage-2 + Stats Work + Basic Stats Work: + diff --git a/ql/src/test/results/clientpositive/llap/constantfolding.q.out b/ql/src/test/results/clientpositive/constantfolding.q.out similarity index 95% rename from ql/src/test/results/clientpositive/llap/constantfolding.q.out rename to ql/src/test/results/clientpositive/constantfolding.q.out index 44de853a82..3a44344ceb 100644 --- a/ql/src/test/results/clientpositive/llap/constantfolding.q.out +++ b/ql/src/test/results/clientpositive/constantfolding.q.out @@ -194,9 +194,11 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: TIMESTAMP'1970-12-31 15:59:58.174' (type: timestamp) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 20000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select cast("1970-12-31 15:59:58.174" as DATE) from src @@ -217,9 +219,11 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: DATE'1970-12-31' (type: date) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: CREATE TABLE dest1_n127(c1 STRING) STORED AS TEXTFILE @@ -273,9 +277,11 @@ STAGE PLANS: Processor Tree: TableScan alias: dest1_n127 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 1.098612288668D (type: double), null (type: double), null (type: double), 1.098612288668D (type: double), null (type: double), null (type: double), 1.584962500721D (type: double), null (type: double), null (type: double), 0.47712125472D (type: double), null (type: double), null (type: double), 1.584962500721D (type: double), null (type: double), null (type: double), null (type: double), -1.0D (type: double), 7.389056098931D (type: double), 8.0D (type: double), 8.0D (type: double), 0.125D (type: double), 8.0D (type: double), 2.0D (type: double), NaND (type: double), 1.0D (type: double), 1.0D (type: double), 8.0D (type: double), 8.0D (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: SELECT ROUND(LN(3.0),12), LN(0.0), LN(-1), ROUND(LOG(3.0),12), LOG(0.0), diff --git a/ql/src/test/results/clientpositive/llap/constprog1.q.out b/ql/src/test/results/clientpositive/constprog1.q.out similarity index 85% rename from ql/src/test/results/clientpositive/llap/constprog1.q.out rename to ql/src/test/results/clientpositive/constprog1.q.out index bd0d6688d6..937b9ecb77 100644 --- a/ql/src/test/results/clientpositive/llap/constprog1.q.out +++ b/ql/src/test/results/clientpositive/constprog1.q.out @@ -21,9 +21,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'F1' (type: string) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: SELECT IF(INSTR(CONCAT('foo', 'bar'), 'foob') > 0, "F1", "B1") diff --git a/ql/src/test/results/clientpositive/constprog2.q.out b/ql/src/test/results/clientpositive/constprog2.q.out new file mode 100644 index 0000000000..b1d99e404d --- /dev/null +++ b/ql/src/test/results/clientpositive/constprog2.q.out @@ -0,0 +1,154 @@ +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT src1.key, src1.key + 1, src2.value + FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND src1.key = 86 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT src1.key, src1.key + 1, src2.value + FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND src1.key = 86 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (key = 86) (type: boolean) + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + filterExpr: (key = 86) (type: boolean) + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1 + Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 86 (type: int), 87 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT src1.key, src1.key + 1, src2.value + FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND cast(src1.key as double) = 86 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT src1.key, src1.key + 1, src2.value + FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND cast(src1.key as double) = 86 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key) = 86.0D) (type: boolean) + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) = 86.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + filterExpr: (key = 86) (type: boolean) + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1 + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 86 (type: int), 87 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/constprog3.q.out b/ql/src/test/results/clientpositive/constprog3.q.out new file mode 100644 index 0000000000..b4d2e05a05 --- /dev/null +++ b/ql/src/test/results/clientpositive/constprog3.q.out @@ -0,0 +1,92 @@ +PREHOOK: query: create temporary table table1(id int, val int, val1 int, dimid int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: create temporary table table1(id int, val int, val1 int, dimid int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: create temporary table table3(id int, val int, val1 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table3 +POSTHOOK: query: create temporary table table3(id int, val int, val1 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table3 +Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain +select table1.id, table1.val, table1.val1 +from table1 inner join table3 +on table1.dimid = table3.id and table3.id = 1 where table1.dimid <> 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: explain +select table1.id, table1.val, table1.val1 +from table1 inner join table3 +on table1.dimid = table3.id and table3.id = 1 where table1.dimid <> 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: table1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), val (type: int), val1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + TableScan + alias: table3 + filterExpr: (id = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (id = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/constprog_dp.q.out b/ql/src/test/results/clientpositive/constprog_dp.q.out new file mode 100644 index 0000000000..e3f74c2f06 --- /dev/null +++ b/ql/src/test/results/clientpositive/constprog_dp.q.out @@ -0,0 +1,165 @@ +PREHOOK: query: create table dest_n1(key string, value string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_n1 +POSTHOOK: query: create table dest_n1(key string, value string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_n1 +PREHOOK: query: EXPLAIN +from srcpart +insert overwrite table dest_n1 partition (ds) select key, value, ds where ds='2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest_n1 +POSTHOOK: query: EXPLAIN +from srcpart +insert overwrite table dest_n1 partition (ds) select key, value, ds where ds='2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 272000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 272000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_n1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 1000 Data size: 272000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_n1 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest_n1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_n1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_n1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: from srcpart +insert overwrite table dest_n1 partition (ds) select key, value, ds where ds='2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest_n1 +POSTHOOK: query: from srcpart +insert overwrite table dest_n1 partition (ds) select key, value, ds where ds='2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest_n1@ds=2008-04-08 +POSTHOOK: Lineage: dest_n1 PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_n1 PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] diff --git a/ql/src/test/results/clientpositive/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/constprog_partitioner.q.out new file mode 100644 index 0000000000..13934fb4a6 --- /dev/null +++ b/ql/src/test/results/clientpositive/constprog_partitioner.q.out @@ -0,0 +1,169 @@ +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT src1.key, src1.key + 1, src2.value + FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND src1.key = 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT src1.key, src1.key + 1, src2.value + FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND src1.key = 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (key = 100) (type: boolean) + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + filterExpr: (key = 100) (type: boolean) + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1 + Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 100 (type: int), 101 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT l_partkey, l_suppkey +FROM lineitem li +WHERE li.l_linenumber = 1 AND + li.l_orderkey IN (SELECT l_orderkey FROM lineitem WHERE l_shipmode = 'AIR' AND l_linenumber = li.l_linenumber) +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT l_partkey, l_suppkey +FROM lineitem li +WHERE li.l_linenumber = 1 AND + li.l_orderkey IN (SELECT l_orderkey FROM lineitem WHERE l_shipmode = 'AIR' AND l_linenumber = li.l_linenumber) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: li + filterExpr: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean) + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), 1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), 1 (type: int) + Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int) + TableScan + alias: lineitem + filterExpr: ((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + Statistics: Num rows: 100 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l_orderkey (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int), 1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/constprog_type.q.out b/ql/src/test/results/clientpositive/constprog_type.q.out similarity index 56% rename from ql/src/test/results/clientpositive/llap/constprog_type.q.out rename to ql/src/test/results/clientpositive/constprog_type.q.out index 59439a4133..66711dfbae 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_type.q.out +++ b/ql/src/test/results/clientpositive/constprog_type.q.out @@ -22,71 +22,71 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1_n26 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Row Limit Per Split: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: DATE'2013-11-17' (type: date), TIMESTAMP'2011-04-30 03:46:56.4485' (type: timestamp) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 48000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 48000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n26 - Select Operator - expressions: DATE'2013-11-17' (type: date), TIMESTAMP'2011-04-30 03:46:56.4485' (type: timestamp) - outputColumnNames: d, t - Statistics: Num rows: 500 Data size: 48000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(d, 'hll'), compute_stats(t, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Reduce + Map Operator Tree: + TableScan + alias: src + Row Limit Per Split: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: DATE'2013-11-17' (type: date), TIMESTAMP'2011-04-30 03:46:56.4485' (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 48000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 48000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n26 + Select Operator + expressions: DATE'2013-11-17' (type: date), TIMESTAMP'2011-04-30 03:46:56.4485' (type: timestamp) + outputColumnNames: d, t + Statistics: Num rows: 500 Data size: 48000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(d, 'hll'), compute_stats(t, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Dependency Collection + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-0 Move Operator @@ -98,7 +98,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n26 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -106,6 +106,36 @@ STAGE PLANS: Column Types: date, timestamp Table: default.dest1_n26 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n26 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n26 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + PREHOOK: query: INSERT OVERWRITE TABLE dest1_n26 SELECT cast('2013-11-17' as date), cast(cast('1.3041352164485E9' as double) as timestamp) FROM src tablesample (1 rows) diff --git a/ql/src/test/results/clientpositive/llap/constprog_when_case.q.out b/ql/src/test/results/clientpositive/constprog_when_case.q.out similarity index 85% rename from ql/src/test/results/clientpositive/llap/constprog_when_case.q.out rename to ql/src/test/results/clientpositive/constprog_when_case.q.out index b0abe6720f..8d3dd98e6d 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_when_case.q.out +++ b/ql/src/test/results/clientpositive/constprog_when_case.q.out @@ -44,27 +44,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src_orc - Statistics: Num rows: 1000 Data size: 269000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: if((bool0 is true or (null and bool0 is not true and bool0 is not false)), key0, if((((not bool0) is true and bool0 is not true) or (null and bool0 is not true and bool0 is not false)), key1, key2)) (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Reduce + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 1000 Data size: 269000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if((bool0 is true or (null and bool0 is not true and bool0 is not false)), key0, if((((not bool0) is true and bool0 is not true) or (null and bool0 is not true and bool0 is not false)), key1, key2)) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/correlated_join_keys.q.out b/ql/src/test/results/clientpositive/correlated_join_keys.q.out new file mode 100644 index 0000000000..3fccaff67b --- /dev/null +++ b/ql/src/test/results/clientpositive/correlated_join_keys.q.out @@ -0,0 +1,282 @@ +PREHOOK: query: drop table customer_address_n0 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table customer_address_n0 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table customer_address_n0 +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset float, + ca_location_type string +) +row format delimited fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_address_n0 +POSTHOOK: query: create table customer_address_n0 +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset float, + ca_location_type string +) +row format delimited fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_address_n0 +PREHOOK: query: load data local inpath '../../data/files/customer_address.txt' overwrite into table customer_address_n0 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@customer_address_n0 +POSTHOOK: query: load data local inpath '../../data/files/customer_address.txt' overwrite into table customer_address_n0 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@customer_address_n0 +PREHOOK: query: analyze table customer_address_n0 compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address_n0 +PREHOOK: Output: default@customer_address_n0 +POSTHOOK: query: analyze table customer_address_n0 compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address_n0 +POSTHOOK: Output: default@customer_address_n0 +PREHOOK: query: analyze table customer_address_n0 compute statistics for columns ca_state, ca_zip +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@customer_address_n0 +PREHOOK: Output: default@customer_address_n0 +#### A masked pattern was here #### +POSTHOOK: query: analyze table customer_address_n0 compute statistics for columns ca_state, ca_zip +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@customer_address_n0 +POSTHOOK: Output: default@customer_address_n0 +#### A masked pattern was here #### +PREHOOK: query: explain select count(*) from customer_address_n0 a join customer_address_n0 b on (a.ca_zip = b.ca_zip and a.ca_state = b.ca_state) +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain select count(*) from customer_address_n0 a join customer_address_n0 b on (a.ca_zip = b.ca_zip and a.ca_state = b.ca_state) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: (ca_zip is not null and ca_state is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ca_zip is not null and ca_state is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ca_state (type: string), ca_zip (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: b + filterExpr: (ca_zip is not null and ca_state is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ca_zip is not null and ca_state is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ca_state (type: string), ca_zip (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(*) from customer_address_n0 a join customer_address_n0 b on (a.ca_zip = b.ca_zip and a.ca_state = b.ca_state) +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain select count(*) from customer_address_n0 a join customer_address_n0 b on (a.ca_zip = b.ca_zip and a.ca_state = b.ca_state) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: (ca_zip is not null and ca_state is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ca_zip is not null and ca_state is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ca_state (type: string), ca_zip (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: b + filterExpr: (ca_zip is not null and ca_state is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ca_zip is not null and ca_state is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ca_state (type: string), ca_zip (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table customer_address_n0 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@customer_address_n0 +PREHOOK: Output: default@customer_address_n0 +POSTHOOK: query: drop table customer_address_n0 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@customer_address_n0 +POSTHOOK: Output: default@customer_address_n0 diff --git a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out new file mode 100644 index 0000000000..3da383f2b0 --- /dev/null +++ b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out @@ -0,0 +1,1099 @@ +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +LEFT SEMI JOIN src yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +LEFT SEMI JOIN src yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + alias: yy + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.cnt +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +LEFT SEMI JOIN src yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.cnt +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +LEFT SEMI JOIN src yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1 +XifREjIWNTdZii76gCxhIQ== +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +LEFT SEMI JOIN src yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +LEFT SEMI JOIN src yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: yy + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 300 Data size: 26050 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 330 Data size: 28655 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 330 Data size: 28655 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 165 Data size: 14327 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 465 Data size: 40377 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 511 Data size: 44414 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 511 Data size: 44414 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Mux Operator + Statistics: Num rows: 465 Data size: 40377 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 511 Data size: 44414 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 511 Data size: 44414 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.cnt +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +LEFT SEMI JOIN src yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.cnt +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +LEFT SEMI JOIN src yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1 +XifREjIWNTdZii76gCxhIQ== +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.value +FROM +src1 xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND + y.key > 20) yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.value +FROM +src1 xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND + y.key > 20) yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: y + filterExpr: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(key) < 200.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(key) < 200.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 27 Data size: 2349 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: xx + filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) + Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 27 Data size: 2349 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.value +FROM +src1 xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND + y.key > 20) yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.value +FROM +src1 xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND + y.key > 20) yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 +146 val_146 +150 val_150 +66 val_66 +98 val_98 +yXzkFzMwxxoH+6e+nKoA8A== +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.value +FROM +src1 xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND + y.key > 20) yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.value +FROM +src1 xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND + y.key > 20) yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: xx + filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) + Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: x + filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: y + filterExpr: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(key) < 200.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(key) < 200.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 112 Data size: 9920 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 235 Data size: 20832 Basic stats: COMPLETE Column stats: PARTIAL + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 258 Data size: 22915 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 258 Data size: 22915 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 123 Data size: 10912 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 235 Data size: 20832 Basic stats: COMPLETE Column stats: PARTIAL + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 258 Data size: 22915 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 258 Data size: 22915 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.value +FROM +src1 xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND + y.key > 20) yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.value +FROM +src1 xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND + y.key > 20) yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 +146 val_146 +150 val_150 +66 val_66 +98 val_98 +yXzkFzMwxxoH+6e+nKoA8A== +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.value +FROM +src xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND x.key > 180) yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.value +FROM +src xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND x.key > 180) yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: y + filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 27 Data size: 2349 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: xx + filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 27 Data size: 2349 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.value +FROM +src xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND x.key > 180) yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.value +FROM +src xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND x.key > 180) yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +JoqlU/XtLjyBlZ8xMKANxg== +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.value +FROM +src xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND x.key > 180) yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.value +FROM +src xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND x.key > 180) yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: xx + filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: x + filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: y + filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 165 Data size: 19360 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 346 Data size: 40656 Basic stats: COMPLETE Column stats: PARTIAL + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 380 Data size: 44721 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 380 Data size: 44721 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 181 Data size: 21296 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 346 Data size: 40656 Basic stats: COMPLETE Column stats: PARTIAL + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 380 Data size: 44721 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 380 Data size: 44721 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.value +FROM +src xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND x.key > 180) yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.value +FROM +src xx +LEFT SEMI JOIN +(SELECT x.key as key + FROM src x JOIN src y ON (x.key = y.key) + WHERE x.key < 200 AND x.key > 180) yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +JoqlU/XtLjyBlZ8xMKANxg== diff --git a/ql/src/test/results/clientpositive/correlationoptimizer11.q.out b/ql/src/test/results/clientpositive/correlationoptimizer11.q.out new file mode 100644 index 0000000000..b2c4c6f187 --- /dev/null +++ b/ql/src/test/results/clientpositive/correlationoptimizer11.q.out @@ -0,0 +1,578 @@ +PREHOOK: query: CREATE TABLE part_table_n1(key string, value string) PARTITIONED BY (partitionId int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_table_n1 +POSTHOOK: query: CREATE TABLE part_table_n1(key string, value string) PARTITIONED BY (partitionId int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_table_n1 +PREHOOK: query: INSERT OVERWRITE TABLE part_table_n1 PARTITION (partitionId=1) + SELECT key, value FROM src ORDER BY key, value LIMIT 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@part_table_n1@partitionid=1 +POSTHOOK: query: INSERT OVERWRITE TABLE part_table_n1 PARTITION (partitionId=1) + SELECT key, value FROM src ORDER BY key, value LIMIT 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@part_table_n1@partitionid=1 +POSTHOOK: Lineage: part_table_n1 PARTITION(partitionid=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: part_table_n1 PARTITION(partitionid=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE part_table_n1 PARTITION (partitionId=2) + SELECT key, value FROM src1 ORDER BY key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@part_table_n1@partitionid=2 +POSTHOOK: query: INSERT OVERWRITE TABLE part_table_n1 PARTITION (partitionId=2) + SELECT key, value FROM src1 ORDER BY key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@part_table_n1@partitionid=2 +POSTHOOK: Lineage: part_table_n1 PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: part_table_n1 PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN +SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 1 AND + y.partitionId = 2 +GROUP BY x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@part_table_n1 +PREHOOK: Input: default@part_table_n1@partitionid=1 +PREHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 1 AND + y.partitionId = 2 +GROUP BY x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_table_n1 +POSTHOOK: Input: default@part_table_n1@partitionid=1 +POSTHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: ((partitionid = 1) and key is not null) (type: boolean) + Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: y + filterExpr: ((partitionid = 2) and key is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 665 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 7 Data size: 665 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 665 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 665 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 1 AND + y.partitionId = 2 +GROUP BY x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@part_table_n1 +PREHOOK: Input: default@part_table_n1@partitionid=1 +PREHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 1 AND + y.partitionId = 2 +GROUP BY x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_table_n1 +POSTHOOK: Input: default@part_table_n1@partitionid=1 +POSTHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### +128 3 +146 2 +150 1 +PREHOOK: query: EXPLAIN +SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 1 AND + y.partitionId = 2 +GROUP BY x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@part_table_n1 +PREHOOK: Input: default@part_table_n1@partitionid=1 +PREHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 1 AND + y.partitionId = 2 +GROUP BY x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_table_n1 +POSTHOOK: Input: default@part_table_n1@partitionid=1 +POSTHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: ((partitionid = 1) and key is not null) (type: boolean) + Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: y + filterExpr: ((partitionid = 2) and key is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 125 Data size: 10850 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 11935 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 137 Data size: 11935 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 68 Data size: 5923 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 68 Data size: 5923 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 1 AND + y.partitionId = 2 +GROUP BY x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@part_table_n1 +PREHOOK: Input: default@part_table_n1@partitionid=1 +PREHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 1 AND + y.partitionId = 2 +GROUP BY x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_table_n1 +POSTHOOK: Input: default@part_table_n1@partitionid=1 +POSTHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### +128 3 +146 2 +150 1 +PREHOOK: query: EXPLAIN +SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 2 AND + y.partitionId = 2 +GROUP BY x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@part_table_n1 +PREHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 2 AND + y.partitionId = 2 +GROUP BY x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_table_n1 +POSTHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: ((partitionid = 2) and key is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: y + filterExpr: ((partitionid = 2) and key is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 2 AND + y.partitionId = 2 +GROUP BY x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@part_table_n1 +PREHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 2 AND + y.partitionId = 2 +GROUP BY x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_table_n1 +POSTHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### + 100 +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1 +PREHOOK: query: EXPLAIN +SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 2 AND + y.partitionId = 2 +GROUP BY x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@part_table_n1 +PREHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 2 AND + y.partitionId = 2 +GROUP BY x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_table_n1 +POSTHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: ((partitionid = 2) and key is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: y + filterExpr: ((partitionid = 2) and key is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 50 Data size: 4300 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 4730 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 55 Data size: 4730 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 2322 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 2322 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 2 AND + y.partitionId = 2 +GROUP BY x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@part_table_n1 +PREHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.key AS key, count(1) AS cnt +FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) +WHERE x.partitionId = 2 AND + y.partitionId = 2 +GROUP BY x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_table_n1 +POSTHOOK: Input: default@part_table_n1@partitionid=2 +#### A masked pattern was here #### + 100 +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1 diff --git a/ql/src/test/results/clientpositive/correlationoptimizer12.q.out b/ql/src/test/results/clientpositive/correlationoptimizer12.q.out new file mode 100644 index 0000000000..713fb63fdf --- /dev/null +++ b/ql/src/test/results/clientpositive/correlationoptimizer12.q.out @@ -0,0 +1,177 @@ +PREHOOK: query: EXPLAIN SELECT xx.key, xx.cnt, yy.key, yy.cnt +FROM +(SELECT x.key as key, count(x.value) OVER (PARTITION BY x.key) AS cnt FROM src x) xx +JOIN +(SELECT y.key as key, count(y.value) OVER (PARTITION BY y.key) AS cnt FROM src1 y) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT xx.key, xx.cnt, yy.key, yy.cnt +FROM +(SELECT x.key as key, count(x.value) OVER (PARTITION BY x.key) AS cnt FROM src x) xx +JOIN +(SELECT y.key as key, count(y.value) OVER (PARTITION BY y.key) AS cnt FROM src1 y) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 223000 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: count_window_0 + arguments: _col1 + name: count + window function: GenericUDAFCountEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 500 Data size: 223000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), count_window_0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2350 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 39 Data size: 7371 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 39 Data size: 7371 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: count_window_0 + arguments: _col1 + name: count + window function: GenericUDAFCountEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), count_window_0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 2350 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/correlationoptimizer13.q.out b/ql/src/test/results/clientpositive/correlationoptimizer13.q.out new file mode 100644 index 0000000000..1930687207 --- /dev/null +++ b/ql/src/test/results/clientpositive/correlationoptimizer13.q.out @@ -0,0 +1,182 @@ +PREHOOK: query: CREATE TABLE tmp(c1 INT, c2 INT, c3 STRING, c4 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp +POSTHOOK: query: CREATE TABLE tmp(c1 INT, c2 INT, c3 STRING, c4 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp +PREHOOK: query: INSERT OVERWRITE TABLE tmp +SELECT x.key, y.key, x.value, y.value FROM src x JOIN src y ON (x.key = y.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmp +POSTHOOK: query: INSERT OVERWRITE TABLE tmp +SELECT x.key, y.key, x.value, y.value FROM src x JOIN src y ON (x.key = y.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmp +POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN +SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c3, x.c1) xx +JOIN +(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c3, x1.c1) yy +ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) ORDER BY xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c3, x.c1) xx +JOIN +(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c3, x1.c1) yy +ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) ORDER BY xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: ((c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 1028 Data size: 97660 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 248 Data size: 23560 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: c1 (type: int), c3 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + TableScan + alias: x1 + filterExpr: ((c2 > 100) and (c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 1028 Data size: 101772 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((c2 > 100) and (c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 198 Data size: 19602 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: int), c3 (type: string) + outputColumnNames: c1, c3 + Statistics: Num rows: 198 Data size: 19602 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: c1 (type: int), c3 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 223 Data size: 22969 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 248 Data size: 25544 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 272 Data size: 28098 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 272 Data size: 28098 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 248 Data size: 25544 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 272 Data size: 28098 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 272 Data size: 28098 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: bigint) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 272 Data size: 28098 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey5 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 272 Data size: 28098 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 272 Data size: 28098 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/correlationoptimizer14.q.out b/ql/src/test/results/clientpositive/correlationoptimizer14.q.out new file mode 100644 index 0000000000..2962ea9a7b --- /dev/null +++ b/ql/src/test/results/clientpositive/correlationoptimizer14.q.out @@ -0,0 +1,1634 @@ +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x SORT BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y SORT BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x SORT BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y SORT BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 val_128 128 +128 val_128 128 +128 val_128 128 +146 val_146 146 val_146 +146 val_146 146 val_146 +150 val_150 150 val_150 +213 val_213 213 val_213 +213 val_213 213 val_213 +224 val_224 224 +224 val_224 224 +238 val_238 238 val_238 +238 val_238 238 val_238 +255 val_255 255 val_255 +255 val_255 255 val_255 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +278 val_278 278 val_278 +278 val_278 278 val_278 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +369 val_369 369 +369 val_369 369 +369 val_369 369 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +66 val_66 66 val_66 +98 val_98 98 val_98 +98 val_98 98 val_98 +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 525 Data size: 93375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 525 Data size: 93450 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 1050 Data size: 186900 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1155 Data size: 205590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1155 Data size: 205590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 525 Data size: 93450 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 1050 Data size: 186900 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1155 Data size: 205590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1155 Data size: 205590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 val_128 128 +128 val_128 128 +128 val_128 128 +146 val_146 146 val_146 +146 val_146 146 val_146 +150 val_150 150 val_150 +213 val_213 213 val_213 +213 val_213 213 val_213 +224 val_224 224 +224 val_224 224 +238 val_238 238 val_238 +238 val_238 238 val_238 +255 val_255 255 val_255 +255 val_255 255 val_255 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +278 val_278 278 val_278 +278 val_278 278 val_278 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +369 val_369 369 +369 val_369 369 +369 val_369 369 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +66 val_66 66 val_66 +98 val_98 98 val_98 +98 val_98 98 val_98 +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key DESC) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key DESC) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key DESC) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key DESC) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: - + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: - + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x ORDER BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y ORDER BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x ORDER BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y ORDER BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 val_128 128 +128 val_128 128 +128 val_128 128 +146 val_146 146 val_146 +146 val_146 146 val_146 +150 val_150 150 val_150 +213 val_213 213 val_213 +213 val_213 213 val_213 +224 val_224 224 +224 val_224 224 +238 val_238 238 val_238 +238 val_238 238 val_238 +255 val_255 255 val_255 +255 val_255 255 val_255 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +278 val_278 278 val_278 +278 val_278 278 val_278 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +369 val_369 369 +369 val_369 369 +369 val_369 369 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +66 val_66 66 val_66 +98 val_98 98 val_98 +98 val_98 98 val_98 +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 525 Data size: 93375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 525 Data size: 93450 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 1050 Data size: 186900 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1155 Data size: 205590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1155 Data size: 205590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 525 Data size: 93450 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 1050 Data size: 186900 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1155 Data size: 205590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1155 Data size: 205590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx +JOIN +(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 val_128 128 +128 val_128 128 +128 val_128 128 +146 val_146 146 val_146 +146 val_146 146 val_146 +150 val_150 150 val_150 +213 val_213 213 val_213 +213 val_213 213 val_213 +224 val_224 224 +224 val_224 224 +238 val_238 238 val_238 +238 val_238 238 val_238 +255 val_255 255 val_255 +255 val_255 255 val_255 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +278 val_278 278 val_278 +278 val_278 278 val_278 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +369 val_369 369 +369 val_369 369 +369 val_369 369 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +66 val_66 66 val_66 +98 val_98 98 val_98 +98 val_98 98 val_98 +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx +JOIN +(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx +JOIN +(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 4896 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 18 Data size: 4896 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx +JOIN +(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx +JOIN +(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 val_128 128 1 +128 val_128 128 1 +128 val_128 128 1 +146 val_146 146 1 +146 val_146 146 1 +150 val_150 150 1 +213 val_213 213 1 +213 val_213 213 1 +224 val_224 224 1 +224 val_224 224 1 +238 val_238 238 1 +238 val_238 238 1 +255 val_255 255 1 +255 val_255 255 1 +273 val_273 273 1 +273 val_273 273 1 +273 val_273 273 1 +278 val_278 278 1 +278 val_278 278 1 +311 val_311 311 1 +311 val_311 311 1 +311 val_311 311 1 +369 val_369 369 1 +369 val_369 369 1 +369 val_369 369 1 +401 val_401 401 1 +401 val_401 401 1 +401 val_401 401 1 +401 val_401 401 1 +401 val_401 401 1 +406 val_406 406 1 +406 val_406 406 1 +406 val_406 406 1 +406 val_406 406 1 +66 val_66 66 1 +98 val_98 98 1 +98 val_98 98 1 +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx +JOIN +(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx +JOIN +(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 512 Data size: 90128 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 512 Data size: 91648 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 524 Data size: 92776 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 576 Data size: 102053 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 576 Data size: 102053 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 524 Data size: 92776 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 576 Data size: 102053 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 576 Data size: 102053 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx +JOIN +(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy +ON (xx.key=yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value +FROM +(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx +JOIN +(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy +ON (xx.key=yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/correlationoptimizer15.q.out b/ql/src/test/results/clientpositive/correlationoptimizer15.q.out new file mode 100644 index 0000000000..607b617da1 --- /dev/null +++ b/ql/src/test/results/clientpositive/correlationoptimizer15.q.out @@ -0,0 +1,436 @@ +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt, yy.key +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +JOIN src yy +ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt, yy.key +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +JOIN src yy +ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + alias: yy + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + null sort order: zzz + sort order: +++ + Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.cnt, yy.key +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +JOIN src yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.cnt, yy.key +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +JOIN src yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 1 128 +128 1 128 +128 1 128 +146 1 146 +146 1 146 +150 1 150 +213 1 213 +213 1 213 +224 1 224 +224 1 224 +238 1 238 +238 1 238 +255 1 255 +255 1 255 +273 1 273 +273 1 273 +273 1 273 +278 1 278 +278 1 278 +311 1 311 +311 1 311 +311 1 311 +369 1 369 +369 1 369 +369 1 369 +401 1 401 +401 1 401 +401 1 401 +401 1 401 +401 1 401 +406 1 406 +406 1 406 +406 1 406 +406 1 406 +66 1 66 +98 1 98 +98 1 98 +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt, yy.key +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +JOIN src yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt, yy.key +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +JOIN src yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: yy + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 550 Data size: 47800 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 605 Data size: 52580 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 605 Data size: 52580 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 26246 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 852 Data size: 74046 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 937 Data size: 81450 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 937 Data size: 81450 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Mux Operator + Statistics: Num rows: 852 Data size: 74046 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 937 Data size: 81450 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 937 Data size: 81450 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.cnt, yy.key +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +JOIN src yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.cnt, yy.key +FROM +(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx +JOIN src yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 1 128 +128 1 128 +128 1 128 +146 1 146 +146 1 146 +150 1 150 +213 1 213 +213 1 213 +224 1 224 +224 1 224 +238 1 238 +238 1 238 +255 1 255 +255 1 255 +273 1 273 +273 1 273 +273 1 273 +278 1 278 +278 1 278 +311 1 311 +311 1 311 +311 1 311 +369 1 369 +369 1 369 +369 1 369 +401 1 401 +401 1 401 +401 1 401 +401 1 401 +401 1 401 +406 1 406 +406 1 406 +406 1 406 +406 1 406 +66 1 66 +98 1 98 +98 1 98 diff --git a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out new file mode 100644 index 0000000000..2e9e6027ae --- /dev/null +++ b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out @@ -0,0 +1,888 @@ +PREHOOK: query: CREATE TABLE T1_n19(key INT, val STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_n19 +POSTHOOK: query: CREATE TABLE T1_n19(key INT, val STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_n19 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1_n19 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_n19 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1_n19 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_n19 +PREHOOK: query: CREATE TABLE T2_n11(key INT, val STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2_n11 +POSTHOOK: query: CREATE TABLE T2_n11(key INT, val STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2_n11 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T2_n11 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2_n11 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T2_n11 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2_n11 +PREHOOK: query: CREATE TABLE T3_n5(key INT, val STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3_n5 +POSTHOOK: query: CREATE TABLE T3_n5(key INT, val STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3_n5 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE T3_n5 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3_n5 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE T3_n5 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3_n5 +PREHOOK: query: CREATE TABLE T4_n1(key INT, val STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T4_n1 +POSTHOOK: query: CREATE TABLE T4_n1(key INT, val STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T4_n1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE T4_n1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t4_n1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE T4_n1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t4_n1 +PREHOOK: query: CREATE TABLE dest_co1(key INT, val STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_co1 +POSTHOOK: query: CREATE TABLE dest_co1(key INT, val STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_co1 +PREHOOK: query: CREATE TABLE dest_co2(key INT, val STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_co2 +POSTHOOK: query: CREATE TABLE dest_co2(key INT, val STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_co2 +PREHOOK: query: CREATE TABLE dest_co3(key INT, val STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_co3 +POSTHOOK: query: CREATE TABLE dest_co3(key INT, val STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_co3 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_co1 +SELECT b.key, d.val +FROM +(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b +JOIN +(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d +ON b.key = d.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n19 +PREHOOK: Input: default@t2_n11 +PREHOOK: Input: default@t3_n5 +PREHOOK: Input: default@t4_n1 +PREHOOK: Output: default@dest_co1 +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_co1 +SELECT b.key, d.val +FROM +(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b +JOIN +(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d +ON b.key = d.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n19 +POSTHOOK: Input: default@t2_n11 +POSTHOOK: Input: default@t3_n5 +POSTHOOK: Input: default@t4_n1 +POSTHOOK: Output: default@dest_co1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-5 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co1 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: n + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: m + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + +PREHOOK: query: INSERT OVERWRITE TABLE dest_co1 +SELECT b.key, d.val +FROM +(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b +JOIN +(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d +ON b.key = d.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n19 +PREHOOK: Input: default@t2_n11 +PREHOOK: Input: default@t3_n5 +PREHOOK: Input: default@t4_n1 +PREHOOK: Output: default@dest_co1 +POSTHOOK: query: INSERT OVERWRITE TABLE dest_co1 +SELECT b.key, d.val +FROM +(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b +JOIN +(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d +ON b.key = d.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n19 +POSTHOOK: Input: default@t2_n11 +POSTHOOK: Input: default@t3_n5 +POSTHOOK: Input: default@t4_n1 +POSTHOOK: Output: default@dest_co1 +POSTHOOK: Lineage: dest_co1.key SIMPLE [(t1_n19)x.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest_co1.val SIMPLE [(t4_n1)n.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_co2 +SELECT b.key, d.val +FROM +(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b +JOIN +(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d +ON b.key = d.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n19 +PREHOOK: Input: default@t2_n11 +PREHOOK: Input: default@t3_n5 +PREHOOK: Input: default@t4_n1 +PREHOOK: Output: default@dest_co2 +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_co2 +SELECT b.key, d.val +FROM +(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b +JOIN +(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d +ON b.key = d.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n19 +POSTHOOK: Input: default@t2_n11 +POSTHOOK: Input: default@t3_n5 +POSTHOOK: Input: default@t4_n1 +POSTHOOK: Output: default@dest_co2 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: n + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: m + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 4 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 8 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 4 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 8 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + +PREHOOK: query: INSERT OVERWRITE TABLE dest_co2 +SELECT b.key, d.val +FROM +(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b +JOIN +(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d +ON b.key = d.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n19 +PREHOOK: Input: default@t2_n11 +PREHOOK: Input: default@t3_n5 +PREHOOK: Input: default@t4_n1 +PREHOOK: Output: default@dest_co2 +POSTHOOK: query: INSERT OVERWRITE TABLE dest_co2 +SELECT b.key, d.val +FROM +(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b +JOIN +(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d +ON b.key = d.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n19 +POSTHOOK: Input: default@t2_n11 +POSTHOOK: Input: default@t3_n5 +POSTHOOK: Input: default@t4_n1 +POSTHOOK: Output: default@dest_co2 +POSTHOOK: Lineage: dest_co2.key SIMPLE [(t1_n19)x.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest_co2.val SIMPLE [(t4_n1)n.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_co3 +SELECT b.key, d.val +FROM +(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b +JOIN +(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d +ON b.key = d.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n19 +PREHOOK: Input: default@t2_n11 +PREHOOK: Input: default@t3_n5 +PREHOOK: Input: default@t4_n1 +PREHOOK: Output: default@dest_co3 +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_co3 +SELECT b.key, d.val +FROM +(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b +JOIN +(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d +ON b.key = d.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n19 +POSTHOOK: Input: default@t2_n11 +POSTHOOK: Input: default@t3_n5 +POSTHOOK: Input: default@t4_n1 +POSTHOOK: Output: default@dest_co3 +STAGE DEPENDENCIES: + Stage-14 is a root stage + Stage-10 depends on stages: Stage-14 + Stage-9 depends on stages: Stage-10, Stage-11 , consists of Stage-12, Stage-13, Stage-2 + Stage-12 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-12 + Stage-0 depends on stages: Stage-2, Stage-7, Stage-8 + Stage-3 depends on stages: Stage-0 + Stage-13 has a backup stage: Stage-2 + Stage-8 depends on stages: Stage-13 + Stage-2 + Stage-15 is a root stage + Stage-11 depends on stages: Stage-15 + +STAGE PLANS: + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:y + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:y + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-9 + Conditional Operator + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co3 + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co3 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + + Stage: Stage-13 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co3 + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co3 + + Stage: Stage-15 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_2:$hdt$_3:m + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_2:$hdt$_3:m + TableScan + alias: m + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + alias: n + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + +PREHOOK: query: INSERT OVERWRITE TABLE dest_co3 +SELECT b.key, d.val +FROM +(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b +JOIN +(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d +ON b.key = d.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n19 +PREHOOK: Input: default@t2_n11 +PREHOOK: Input: default@t3_n5 +PREHOOK: Input: default@t4_n1 +PREHOOK: Output: default@dest_co3 +POSTHOOK: query: INSERT OVERWRITE TABLE dest_co3 +SELECT b.key, d.val +FROM +(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b +JOIN +(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d +ON b.key = d.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n19 +POSTHOOK: Input: default@t2_n11 +POSTHOOK: Input: default@t3_n5 +POSTHOOK: Input: default@t4_n1 +POSTHOOK: Output: default@dest_co3 +POSTHOOK: Lineage: dest_co3.key SIMPLE [(t1_n19)x.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest_co3.val SIMPLE [(t4_n1)n.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_co1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_co1 +#### A masked pattern was here #### +8409 7619696771 +PREHOOK: query: SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_co2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_co2 +#### A masked pattern was here #### +8409 7619696771 +PREHOOK: query: SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_co3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_co3 +#### A masked pattern was here #### +8409 7619696771 diff --git a/ql/src/test/results/clientpositive/correlationoptimizer7.q.out b/ql/src/test/results/clientpositive/correlationoptimizer7.q.out new file mode 100644 index 0000000000..bd0f301a4b --- /dev/null +++ b/ql/src/test/results/clientpositive/correlationoptimizer7.q.out @@ -0,0 +1,776 @@ +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-9 is a root stage + Stage-2 depends on stages: Stage-9 + Stage-8 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_1:y + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_1:y + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:yy + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:yy + TableScan + alias: yy + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 3 128 +146 2 146 val_146 +150 1 150 val_150 +213 2 213 val_213 +224 2 224 +238 2 238 val_238 +255 2 255 val_255 +273 3 273 val_273 +278 2 278 val_278 +311 3 311 val_311 +369 3 369 +401 5 401 val_401 +406 4 406 val_406 +66 1 66 val_66 +98 2 98 val_98 +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_1:y + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_1:y + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: yy + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 27 Data size: 4565 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 29 Data size: 4755 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 5230 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 31 Data size: 5230 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Mux Operator + Statistics: Num rows: 29 Data size: 4755 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 5230 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 31 Data size: 5230 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 3 128 +146 2 146 val_146 +150 1 150 val_150 +213 2 213 val_213 +224 2 224 +238 2 238 val_238 +255 2 255 val_255 +273 3 273 val_273 +278 2 278 val_278 +311 3 311 val_311 +369 3 369 +401 5 401 val_401 +406 4 406 val_406 +66 1 66 val_66 +98 2 98 val_98 +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-9 is a root stage + Stage-2 depends on stages: Stage-9 + Stage-8 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_1:y + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_1:y + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:yy + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:yy + TableScan + alias: yy + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 3 128 +146 2 146 val_146 +150 1 150 val_150 +213 2 213 val_213 +224 2 224 +238 2 238 val_238 +255 2 255 val_255 +273 3 273 val_273 +278 2 278 val_278 +311 3 311 val_311 +369 3 369 +401 5 401 val_401 +406 4 406 val_406 +66 1 66 val_66 +98 2 98 val_98 +PREHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_1:y + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_1:y + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: yy + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 27 Data size: 4565 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 29 Data size: 4755 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 5230 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 31 Data size: 5230 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Mux Operator + Statistics: Num rows: 29 Data size: 4755 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 5230 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 31 Data size: 5230 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src x JOIN src1 y ON (x.key = y.key) + GROUP BY x.key) xx +JOIN src1 yy +ON xx.key=yy.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 3 128 +146 2 146 val_146 +150 1 150 val_150 +213 2 213 val_213 +224 2 224 +238 2 238 val_238 +255 2 255 val_255 +273 3 273 val_273 +278 2 278 val_278 +311 3 311 val_311 +369 3 369 +401 5 401 val_401 +406 4 406 val_406 +66 1 66 val_66 +98 2 98 val_98 diff --git a/ql/src/test/results/clientpositive/correlationoptimizer8.q.out b/ql/src/test/results/clientpositive/correlationoptimizer8.q.out new file mode 100644 index 0000000000..c9c1e921b2 --- /dev/null +++ b/ql/src/test/results/clientpositive/correlationoptimizer8.q.out @@ -0,0 +1,1259 @@ +PREHOOK: query: EXPLAIN +SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + Union + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + alias: x + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: x1 + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) + Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 3 +146 val_146 2 +150 val_150 1 +213 val_213 2 +224 2 +238 val_238 2 +255 val_255 2 +273 val_273 3 +278 val_278 2 +311 val_311 3 +369 3 +401 val_401 5 +406 val_406 4 +PREHOOK: query: EXPLAIN +SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + alias: x1 + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) + Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + alias: x + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 171 Data size: 18085 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 319 Data size: 32145 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 319 Data size: 32145 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Mux Operator + Statistics: Num rows: 319 Data size: 32145 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 3 +146 val_146 2 +150 val_150 1 +213 val_213 2 +224 2 +238 val_238 2 +255 val_255 2 +273 val_273 3 +278 val_278 2 +311 val_311 3 +369 3 +401 val_401 5 +406 val_406 4 +PREHOOK: query: EXPLAIN +SELECT subq1.key, subq1.cnt, x.key, x.value +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value +) subq1 +LEFT OUTER JOIN src1 x ON (x.key = subq1.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT subq1.key, subq1.cnt, x.key, x.value +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value +) subq1 +LEFT OUTER JOIN src1 x ON (x.key = subq1.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 20.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 87 Data size: 8273 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87 Data size: 8273 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + Union + Statistics: Num rows: 87 Data size: 8273 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87 Data size: 8273 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 113 Data size: 15686 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 113 Data size: 15686 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: x1 + filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT subq1.key, subq1.cnt, x.key, x.value +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value +) subq1 +LEFT OUTER JOIN src1 x ON (x.key = subq1.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT subq1.key, subq1.cnt, x.key, x.value +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value +) subq1 +LEFT OUTER JOIN src1 x ON (x.key = subq1.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + 3 + 3 + 3 + 3 + 3 val_165 + 3 val_193 + 3 val_265 + 3 val_27 + 3 val_409 + 3 val_484 +0 3 NULL NULL +10 1 NULL NULL +11 1 NULL NULL +12 2 NULL NULL +15 2 NULL NULL +17 1 NULL NULL +18 2 NULL NULL +19 1 NULL NULL +2 1 NULL NULL +4 1 NULL NULL +5 3 NULL NULL +8 1 NULL NULL +9 1 NULL NULL +val_146 1 NULL NULL +val_150 1 NULL NULL +val_213 1 NULL NULL +val_238 1 NULL NULL +val_255 1 NULL NULL +val_273 1 NULL NULL +val_278 1 NULL NULL +val_311 1 NULL NULL +val_401 1 NULL NULL +val_406 1 NULL NULL +PREHOOK: query: EXPLAIN +SELECT subq1.key, subq1.cnt, x.key, x.value +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value +) subq1 +LEFT OUTER JOIN src1 x ON (x.key = subq1.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT subq1.key, subq1.cnt, x.key, x.value +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value +) subq1 +LEFT OUTER JOIN src1 x ON (x.key = subq1.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 20.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + alias: x1 + filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 112 Data size: 12648 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 8051 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 166 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 278 Data size: 28750 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 305 Data size: 31625 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 305 Data size: 31625 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 8051 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 166 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 278 Data size: 28750 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 305 Data size: 31625 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 305 Data size: 31625 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Mux Operator + Statistics: Num rows: 278 Data size: 28750 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 305 Data size: 31625 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 305 Data size: 31625 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT subq1.key, subq1.cnt, x.key, x.value +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value +) subq1 +LEFT OUTER JOIN src1 x ON (x.key = subq1.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT subq1.key, subq1.cnt, x.key, x.value +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value +) subq1 +LEFT OUTER JOIN src1 x ON (x.key = subq1.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + 3 + 3 + 3 + 3 + 3 val_165 + 3 val_193 + 3 val_265 + 3 val_27 + 3 val_409 + 3 val_484 +0 3 NULL NULL +10 1 NULL NULL +11 1 NULL NULL +12 2 NULL NULL +15 2 NULL NULL +17 1 NULL NULL +18 2 NULL NULL +19 1 NULL NULL +2 1 NULL NULL +4 1 NULL NULL +5 3 NULL NULL +8 1 NULL NULL +9 1 NULL NULL +val_146 1 NULL NULL +val_150 1 NULL NULL +val_213 1 NULL NULL +val_238 1 NULL NULL +val_255 1 NULL NULL +val_273 1 NULL NULL +val_278 1 NULL NULL +val_311 1 NULL NULL +val_401 1 NULL NULL +val_406 1 NULL NULL +PREHOOK: query: EXPLAIN +SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key, x1.value +) subq1 +JOIN src1 x ON (x.key = subq1.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key, x1.value +) subq1 +JOIN src1 x ON (x.key = subq1.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) + Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + Union + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + alias: x + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: x1 + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) + Statistics: Num rows: 148 Data size: 26344 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 74 Data size: 13764 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 74 Data size: 13764 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 74 Data size: 13764 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +WARNING: Comparing a bigint and a string may result in a loss of precision. +PREHOOK: query: EXPLAIN +SELECT subq1.key, subq1.value, x.key, x.value +FROM +( SELECT cast(x.key as INT) as key, count(1) as value from src x where x.key < 20 group by x.key + UNION ALL + SELECT count(1) as key, cast(x1.key as INT) as value from src x1 where x1.key > 100 group by x1.key +) subq1 +FULL OUTER JOIN src1 x ON (x.key = subq1.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT subq1.key, subq1.value, x.key, x.value +FROM +( SELECT cast(x.key as INT) as key, count(1) as value from src x where x.key < 20 group by x.key + UNION ALL + SELECT count(1) as key, cast(x1.key as INT) as value from src x1 where x1.key > 100 group by x1.key +) subq1 +FULL OUTER JOIN src1 x ON (x.key = subq1.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 20.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToLong(UDFToInteger(_col0)) (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 166 Data size: 2656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: bigint), UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 3984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: double) + Statistics: Num rows: 166 Data size: 3984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + TableScan + Union + Statistics: Num rows: 166 Data size: 2656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: bigint), UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 3984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: double) + Statistics: Num rows: 166 Data size: 3984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + TableScan + alias: x + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: double) + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col2 (type: double) + 1 _col2 (type: double) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 241 Data size: 9741 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: bigint), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 241 Data size: 9741 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 241 Data size: 9741 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: x1 + filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint), UDFToLong(UDFToInteger(_col0)) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/correlationoptimizer9.q.out b/ql/src/test/results/clientpositive/correlationoptimizer9.q.out new file mode 100644 index 0000000000..20a0bc08b6 --- /dev/null +++ b/ql/src/test/results/clientpositive/correlationoptimizer9.q.out @@ -0,0 +1,708 @@ +PREHOOK: query: CREATE TABLE tmp_n2(c1 INT, c2 INT, c3 STRING, c4 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp_n2 +POSTHOOK: query: CREATE TABLE tmp_n2(c1 INT, c2 INT, c3 STRING, c4 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_n2 +PREHOOK: query: INSERT OVERWRITE TABLE tmp_n2 +SELECT x.key, y.key, x.value, y.value FROM src x JOIN src y ON (x.key = y.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmp_n2 +POSTHOOK: query: INSERT OVERWRITE TABLE tmp_n2 +SELECT x.key, y.key, x.value, y.value FROM src x JOIN src y ON (x.key = y.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmp_n2 +POSTHOOK: Lineage: tmp_n2.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp_n2.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp_n2.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tmp_n2.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN +SELECT xx.key, yy.key, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx +JOIN +(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy +ON (xx.key = yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, yy.key, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx +JOIN +(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy +ON (xx.key = yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: ((c1 < 120) and (c1 > 100)) (type: boolean) + Statistics: Num rows: 1028 Data size: 4112 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((c1 < 120) and (c1 > 100)) (type: boolean) + Statistics: Num rows: 198 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: c1 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 59 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 59 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: x1 + filterExpr: ((c2 > 100) and (c2 < 120)) (type: boolean) + Statistics: Num rows: 1028 Data size: 4112 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((c2 > 100) and (c2 < 120)) (type: boolean) + Statistics: Num rows: 198 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: c2 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, yy.key, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx +JOIN +(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy +ON (xx.key = yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, yy.key, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx +JOIN +(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy +ON (xx.key = yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +103 103 4 4 +104 104 4 4 +105 105 1 1 +111 111 1 1 +113 113 4 4 +114 114 1 1 +116 116 1 1 +118 118 4 4 +119 119 9 9 +PREHOOK: query: EXPLAIN +SELECT xx.key, yy.key, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx +JOIN +(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy +ON (xx.key = yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key, yy.key, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx +JOIN +(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy +ON (xx.key = yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: ((c1 < 120) and (c1 > 100)) (type: boolean) + Statistics: Num rows: 1028 Data size: 4112 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((c1 < 120) and (c1 > 100)) (type: boolean) + Statistics: Num rows: 198 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: c1 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + alias: x1 + filterExpr: ((c2 > 100) and (c2 < 120)) (type: boolean) + Statistics: Num rows: 1028 Data size: 4112 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((c2 > 100) and (c2 < 120)) (type: boolean) + Statistics: Num rows: 198 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: c2 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 118 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 118 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 129 Data size: 1557 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 129 Data size: 1557 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 129 Data size: 1557 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 118 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 129 Data size: 1557 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 129 Data size: 1557 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 129 Data size: 1557 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key, yy.key, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx +JOIN +(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy +ON (xx.key = yy.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key, yy.key, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx +JOIN +(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy +ON (xx.key = yy.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +103 103 4 4 +104 104 4 4 +105 105 1 1 +111 111 1 1 +113 113 4 4 +114 114 1 1 +116 116 1 1 +118 118 4 4 +119 119 9 9 +PREHOOK: query: EXPLAIN +SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx +JOIN +(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy +ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx +JOIN +(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy +ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: ((c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 1028 Data size: 97660 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 248 Data size: 23560 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: c1 (type: int), c3 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 99 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 99 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 99 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: x1 + filterExpr: ((c2 > 100) and (c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 1028 Data size: 101772 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((c2 > 100) and (c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 198 Data size: 19602 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: int), c3 (type: string) + outputColumnNames: c1, c3 + Statistics: Num rows: 198 Data size: 19602 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: c1 (type: int), c3 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx +JOIN +(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy +ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx +JOIN +(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy +ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +103 val_103 103 val_103 4 4 +104 val_104 104 val_104 4 4 +105 val_105 105 val_105 1 1 +111 val_111 111 val_111 1 1 +113 val_113 113 val_113 4 4 +114 val_114 114 val_114 1 1 +116 val_116 116 val_116 1 1 +118 val_118 118 val_118 4 4 +119 val_119 119 val_119 9 9 +PREHOOK: query: EXPLAIN +SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx +JOIN +(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy +ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx +JOIN +(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy +ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: ((c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 1028 Data size: 97660 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 248 Data size: 23560 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: c1 (type: int), c3 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + TableScan + alias: x1 + filterExpr: ((c2 > 100) and (c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 1028 Data size: 101772 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((c2 > 100) and (c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 198 Data size: 19602 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: int), c3 (type: string) + outputColumnNames: c1, c3 + Statistics: Num rows: 198 Data size: 19602 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: c1 (type: int), c3 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 223 Data size: 22969 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 248 Data size: 25544 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 272 Data size: 28098 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 272 Data size: 28098 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 272 Data size: 28098 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE + Mux Operator + Statistics: Num rows: 248 Data size: 25544 Basic stats: COMPLETE Column stats: COMPLETE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 272 Data size: 28098 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 272 Data size: 28098 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 272 Data size: 28098 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx +JOIN +(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy +ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt +FROM +(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx +JOIN +(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy +ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_n2 +#### A masked pattern was here #### +103 val_103 103 val_103 4 4 +104 val_104 104 val_104 4 4 +105 val_105 105 val_105 1 1 +111 val_111 111 val_111 1 1 +113 val_113 113 val_113 4 4 +114 val_114 114 val_114 1 1 +116 val_116 116 val_116 1 1 +118 val_118 118 val_118 4 4 +119 val_119 119 val_119 9 9 diff --git a/ql/src/test/results/clientpositive/cp_sel.q.out b/ql/src/test/results/clientpositive/cp_sel.q.out new file mode 100644 index 0000000000..471ff8067c --- /dev/null +++ b/ql/src/test/results/clientpositive/cp_sel.q.out @@ -0,0 +1,242 @@ +PREHOOK: query: explain +select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 order by 1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: explain +select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 order by 1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + filterExpr: (11.0D = 11.0D) (type: boolean) + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'hello' (type: string), 'world' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 order by 1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 order by 1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +0 val_0 hello world +PREHOOK: query: create table testpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by(key) sorted by(key) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testpartbucket +POSTHOOK: query: create table testpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by(key) sorted by(key) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testpartbucket +PREHOOK: query: explain +insert overwrite table testpartbucket partition(ds,hr) select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Output: default@testpartbucket +POSTHOOK: query: explain +insert overwrite table testpartbucket partition(ds,hr) select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + filterExpr: (11.0D = 11.0D) (type: boolean) + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), 'hello' (type: string), 'world' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testpartbucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testpartbucket + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.testpartbucket + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: insert overwrite table testpartbucket partition(ds,hr) select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Output: default@testpartbucket +POSTHOOK: query: insert overwrite table testpartbucket partition(ds,hr) select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@testpartbucket@ds=hello/hr=world +POSTHOOK: Lineage: testpartbucket PARTITION(ds=hello,hr=world).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testpartbucket PARTITION(ds=hello,hr=world).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from testpartbucket limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@testpartbucket +PREHOOK: Input: default@testpartbucket@ds=hello/hr=world +#### A masked pattern was here #### +POSTHOOK: query: select * from testpartbucket limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testpartbucket +POSTHOOK: Input: default@testpartbucket@ds=hello/hr=world +#### A masked pattern was here #### +0 val_0 hello world +0 val_0 hello world +0 val_0 hello world +PREHOOK: query: drop table testpartbucket +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@testpartbucket +PREHOOK: Output: default@testpartbucket +POSTHOOK: query: drop table testpartbucket +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@testpartbucket +POSTHOOK: Output: default@testpartbucket diff --git a/ql/src/test/results/clientpositive/llap/create_view_disable_cbo.q.out b/ql/src/test/results/clientpositive/create_view_disable_cbo.q.out similarity index 94% rename from ql/src/test/results/clientpositive/llap/create_view_disable_cbo.q.out rename to ql/src/test/results/clientpositive/create_view_disable_cbo.q.out index 8f74bdd2b1..800d2fc149 100644 --- a/ql/src/test/results/clientpositive/llap/create_view_disable_cbo.q.out +++ b/ql/src/test/results/clientpositive/create_view_disable_cbo.q.out @@ -53,7 +53,7 @@ POSTHOOK: Input: cdh_82023_repro_db@data POSTHOOK: Output: cdh_82023_repro_db@background POSTHOOK: Output: database:cdh_82023_repro_db POSTHOOK: Lineage: background.text SIMPLE [(data)xouter.FieldSchema(name:text, type:string, comment:null), ] -Warning: Shuffle Join MERGEJOIN[42][tables = [xouter, sq_1_notin_nullcheck]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join JOIN[24][tables = [xouter, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM `cdh_82023_repro_db`.`background` PREHOOK: type: QUERY PREHOOK: Input: cdh_82023_repro_db@background @@ -85,7 +85,7 @@ POSTHOOK: Input: cdh_82023_repro_db@data POSTHOOK: Output: cdh_82023_repro_db@foreground POSTHOOK: Output: database:cdh_82023_repro_db POSTHOOK: Lineage: foreground.text SIMPLE [(data)xouter.FieldSchema(name:text, type:string, comment:null), ] -Warning: Shuffle Join MERGEJOIN[43][tables = [xouter, sq_1_notin_nullcheck]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join JOIN[24][tables = [xouter, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM `cdh_82023_repro_db`.`foreground` PREHOOK: type: QUERY PREHOOK: Input: cdh_82023_repro_db@background diff --git a/ql/src/test/results/clientpositive/cross_join_merge.q.out b/ql/src/test/results/clientpositive/cross_join_merge.q.out new file mode 100644 index 0000000000..54e68cce20 --- /dev/null +++ b/ql/src/test/results/clientpositive/cross_join_merge.q.out @@ -0,0 +1,670 @@ +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain +select src1.key from src src1 join src src2 join src src3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select src1.key from src src1 join src src2 join src src3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 125000000 Data size: 10875000000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 125000000 Data size: 10875000000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select src1.key from src src1 join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select src1.key from src src1 join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src3 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select src1.key from src src1 join src src2 join src src3 where src1.key=src2.key and src1.key=src3.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select src1.key from src src1 join src src2 join src src3 where src1.key=src2.key and src1.key=src3.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src3 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +select src1.key from src src1 join src src2 on 5 = src2.key join src src3 on src1.key=src3.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select src1.key from src src1 join src src2 on 5 = src2.key join src src3 on src1.key=src3.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src3 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + TableScan + alias: src2 + filterExpr: (UDFToDouble(key) = 5.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) = 5.0D) (type: boolean) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 197750 Data size: 17204250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 197750 Data size: 17204250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain +select src1.key from src src1 left outer join src src2 join src src3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select src1.key from src src1 left outer join src src2 join src src3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 125000000 Data size: 10875000000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 125000000 Data size: 10875000000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select src1.key from src src1 left outer join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select src1.key from src src1 left outer join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src3 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/ctas_colname.q.out b/ql/src/test/results/clientpositive/ctas_colname.q.out similarity index 53% rename from ql/src/test/results/clientpositive/llap/ctas_colname.q.out rename to ql/src/test/results/clientpositive/ctas_colname.q.out index f897f06669..213a80986a 100644 --- a/ql/src/test/results/clientpositive/llap/ctas_colname.q.out +++ b/ql/src/test/results/clientpositive/ctas_colname.q.out @@ -12,91 +12,70 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@summary STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-4 Stage-0 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-3 + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0D) (type: double), concat(value, value) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 7400 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 20 Data size: 7400 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _col3 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: double), VALUE._col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0D) (type: double), concat(value, value) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 7400 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 20 Data size: 7400 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 7400 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.summary - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _col3 (type: string) - outputColumnNames: col1, col2, col3, col4 - Statistics: Num rows: 20 Data size: 7400 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll') - minReductionHashAggr: 0.95 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _col3 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: double), VALUE._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 20 Data size: 7400 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 7400 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 7400 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.summary + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _col3 (type: string) + outputColumnNames: col1, col2, col3, col4 + Statistics: Num rows: 20 Data size: 7400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - mode: mergepartial + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 - Dependency Collection + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-4 Create Table @@ -106,7 +85,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -114,11 +93,29 @@ STAGE PLANS: Column Types: string, string, double, string Table: default.summary - Stage: Stage-0 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: create table summary as select *, key + 1, concat(value, value) from src limit 20 PREHOOK: type: CREATETABLE_AS_SELECT @@ -213,106 +210,85 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@x4 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-4 Stage-0 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-3 + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - null sort order: az - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + null sort order: az + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 4475 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 4475 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.x4 Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string, _col1: string - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS LAST - partition by: _col0 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), rank_window_0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 4475 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 25 Data size: 4475 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.x4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - outputColumnNames: col1, col2, col3 - Statistics: Num rows: 25 Data size: 4475 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') - minReductionHashAggr: 0.96 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: col1, col2, col3 + Statistics: Num rows: 25 Data size: 4475 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 - Dependency Collection + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-4 Create Table @@ -322,7 +298,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -330,11 +306,29 @@ STAGE PLANS: Column Types: string, string, int Table: default.x4 - Stage: Stage-0 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: create table x4 as select *, rank() over(partition by key order by value) as rr from src1 PREHOOK: type: CREATETABLE_AS_SELECT @@ -433,127 +427,116 @@ POSTHOOK: Output: default@x5 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-0, Stage-4 + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - null sort order: az - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 223000 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string, _col1: string - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS LAST - partition by: _col0 - raw input shape: - window functions: - window function definition - alias: lead_window_0 - arguments: _col0, 1 - name: lead - window function: GenericUDAFLeadEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 500 Data size: 223000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), lead_window_0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 20 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) - outputColumnNames: _col0, _col1, _col2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + null sort order: az + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 223000 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: lead_window_0 + arguments: _col0, 1 + name: lead + window function: GenericUDAFLeadEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 500 Data size: 223000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), lead_window_0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.x5 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: col1, col2, col3 - Statistics: Num rows: 20 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') - minReductionHashAggr: 0.95 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 20 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.x5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: col1, col2, col3 + Statistics: Num rows: 20 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 - Dependency Collection + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-5 Create Table columns: key string, value string, lead1 string name: default.x5 @@ -569,11 +552,29 @@ STAGE PLANS: Column Types: string, string, string Table: default.x5 - Stage: Stage-0 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: create table x5 as select *, lead(key,1) over(partition by key order by value) as lead1 from src limit 20 PREHOOK: type: CREATETABLE_AS_SELECT @@ -666,73 +667,79 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@x6 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-8 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-8 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0D) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.x6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) - outputColumnNames: col1, col2, col3 - Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') - minReductionHashAggr: 0.96 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0D) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.x6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) + outputColumnNames: col1, col2, col3 + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Dependency Collection + Stage: Stage-7 + Conditional Operator Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-8 Create Table columns: key string, value string, _c1 double name: default.x6 @@ -740,7 +747,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -748,7 +755,31 @@ STAGE PLANS: Column Types: string, string, double Table: default.x6 - Stage: Stage-0 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.x6 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.x6 + + Stage: Stage-6 Move Operator files: hdfs directory: true @@ -850,95 +881,74 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@x7 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-4 Stage-0 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-3 + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(value) - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial + aggregations: count(value) + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.x7 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: col1, col2, col3 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.x7 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: col1, col2, col3 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 - Dependency Collection + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-4 Create Table @@ -948,7 +958,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -956,11 +966,29 @@ STAGE PLANS: Column Types: string, string, bigint Table: default.x7 - Stage: Stage-0 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: create table x7 as select * from (select *, count(value) from src group by key, value) a PREHOOK: type: CREATETABLE_AS_SELECT @@ -995,7 +1023,7 @@ Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"_c1\":\"true\",\"_col0\":\"true\",\"_col1\":\"true\"}} bucketing_version 2 - numFiles 2 + numFiles 1 numRows 309 rawDataSize 3891 totalSize 4200 @@ -1342,95 +1370,74 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@x8 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-4 Stage-0 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-3 + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 9.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 9.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(value) - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 9.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 9.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial + aggregations: count(value) + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.x8 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: col1, col2, col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') - minReductionHashAggr: 0.9879518 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.x8 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: col1, col2, col3 + Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 - Dependency Collection + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-4 Create Table @@ -1440,7 +1447,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -1448,11 +1455,29 @@ STAGE PLANS: Column Types: string, string, bigint Table: default.x8 - Stage: Stage-0 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: create table x8 as select * from (select *, count(value) from src group by key, value having key < 9) a PREHOOK: type: CREATETABLE_AS_SELECT @@ -1487,7 +1512,7 @@ Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"_c1\":\"true\",\"_col0\":\"true\",\"_col1\":\"true\"}} bucketing_version 2 - numFiles 2 + numFiles 1 numRows 5 rawDataSize 45 totalSize 50 @@ -1530,102 +1555,81 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@x9 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-4 Stage-0 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-3 + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 9.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 9.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: max(value) - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 9.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 9.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial + aggregations: max(value) + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.x9 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: col1, col2 - Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') - minReductionHashAggr: 0.9879518 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.x9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: col1, col2 + Statistics: Num rows: 83 Data size: 22493 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 - Dependency Collection + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-4 Create Table @@ -1635,7 +1639,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -1643,11 +1647,29 @@ STAGE PLANS: Column Types: string, string Table: default.x9 - Stage: Stage-0 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: create table x9 as select * from (select max(value),key from src group by key having key < 9 AND max(value) IS NOT NULL) a PREHOOK: type: CREATETABLE_AS_SELECT @@ -1680,7 +1702,7 @@ Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"_c0\":\"true\",\"key\":\"true\"}} bucketing_version 2 - numFiles 2 + numFiles 1 numRows 5 rawDataSize 35 totalSize 40 diff --git a/ql/src/test/results/clientpositive/llap/ctas_uses_database_location.q.out b/ql/src/test/results/clientpositive/ctas_uses_database_location.q.out similarity index 50% rename from ql/src/test/results/clientpositive/llap/ctas_uses_database_location.q.out rename to ql/src/test/results/clientpositive/ctas_uses_database_location.q.out index dc9096ee00..a497af69e9 100644 --- a/ql/src/test/results/clientpositive/llap/ctas_uses_database_location.q.out +++ b/ql/src/test/results/clientpositive/ctas_uses_database_location.q.out @@ -26,73 +26,79 @@ POSTHOOK: Output: database:db1 POSTHOOK: Output: db1@table_db1 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-8 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-8 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: db1.table_db1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: col1, col2 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: db1.table_db1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: col1, col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Dependency Collection + Stage: Stage-7 + Conditional Operator Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-8 Create Table columns: key string, value string name: db1.table_db1 @@ -100,7 +106,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -108,7 +114,31 @@ STAGE PLANS: Column Types: string, string Table: db1.table_db1 - Stage: Stage-0 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: db1.table_db1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: db1.table_db1 + + Stage: Stage-6 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/cte_6.q.out b/ql/src/test/results/clientpositive/cte_6.q.out new file mode 100644 index 0000000000..f5bc5606d2 --- /dev/null +++ b/ql/src/test/results/clientpositive/cte_6.q.out @@ -0,0 +1,94 @@ +PREHOOK: query: explain +with Q1 as ( select key from sRc where key = '5') +select CPS.key from Q1 CPS +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +with Q1 as ( select key from sRc where key = '5') +select CPS.key from Q1 CPS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '5') (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '5') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '5' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +with Q1 as ( select key from q2 where key = '5'), +Q2 as ( select key from sRc where key = '5') +select CPS.key from Q1 CPS +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +with Q1 as ( select key from q2 where key = '5'), +Q2 as ( select key from sRc where key = '5') +select CPS.key from Q1 CPS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '5') (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '5') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '5' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/cte_join.q.out b/ql/src/test/results/clientpositive/cte_join.q.out similarity index 62% rename from ql/src/test/results/clientpositive/llap/cte_join.q.out rename to ql/src/test/results/clientpositive/cte_join.q.out index 46c0ffffd3..4211b8b1b9 100644 --- a/ql/src/test/results/clientpositive/llap/cte_join.q.out +++ b/ql/src/test/results/clientpositive/cte_join.q.out @@ -165,73 +165,59 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - filterExpr: ((b = 'c') and a is not null) (type: boolean) + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + filterExpr: ((b = 'c') and a is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((b = 'c') and a is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((b = 'c') and a is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: t1 - filterExpr: a is not null (type: boolean) + TableScan + alias: t1 + filterExpr: a is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: varchar(100)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: a is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: varchar(100)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: varchar(100)) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: varchar(100)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/database_location.q.out b/ql/src/test/results/clientpositive/database_location.q.out similarity index 64% rename from ql/src/test/results/clientpositive/llap/database_location.q.out rename to ql/src/test/results/clientpositive/database_location.q.out index 801ed4184f..f0567076a7 100644 --- a/ql/src/test/results/clientpositive/llap/database_location.q.out +++ b/ql/src/test/results/clientpositive/database_location.q.out @@ -142,107 +142,3 @@ POSTHOOK: query: SHOW TABLES POSTHOOK: type: SHOWTABLES POSTHOOK: Input: database:db2 table_db2 -PREHOOK: query: EXPLAIN -CREATE DATABASE db3 -#### A masked pattern was here #### -PREHOOK: type: CREATEDATABASE -PREHOOK: Output: database:db3 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -CREATE DATABASE db3 -#### A masked pattern was here #### -POSTHOOK: type: CREATEDATABASE -POSTHOOK: Output: database:db3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Create Database -#### A masked pattern was here #### - name: db3 - -PREHOOK: query: CREATE DATABASE db3 -#### A masked pattern was here #### -PREHOOK: type: CREATEDATABASE -PREHOOK: Output: database:db3 -#### A masked pattern was here #### -POSTHOOK: query: CREATE DATABASE db3 -#### A masked pattern was here #### -POSTHOOK: type: CREATEDATABASE -POSTHOOK: Output: database:db3 -#### A masked pattern was here #### -PREHOOK: query: DESCRIBE DATABASE db3 -PREHOOK: type: DESCDATABASE -PREHOOK: Input: database:db3 -POSTHOOK: query: DESCRIBE DATABASE db3 -POSTHOOK: type: DESCDATABASE -POSTHOOK: Input: database:db3 -#### A masked pattern was here #### -PREHOOK: query: EXPLAIN -#### A masked pattern was here #### -PREHOOK: type: ALTERDATABASE_LOCATION -PREHOOK: Output: database:db3 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -#### A masked pattern was here #### -POSTHOOK: type: ALTERDATABASE_LOCATION -POSTHOOK: Output: database:db3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 -#### A masked pattern was here #### - name: db3 -#### A masked pattern was here #### - -#### A masked pattern was here #### -PREHOOK: type: ALTERDATABASE_LOCATION -PREHOOK: Output: database:db3 -#### A masked pattern was here #### -POSTHOOK: type: ALTERDATABASE_LOCATION -POSTHOOK: Output: database:db3 -#### A masked pattern was here #### -PREHOOK: query: DESCRIBE DATABASE db3 -PREHOOK: type: DESCDATABASE -PREHOOK: Input: database:db3 -POSTHOOK: query: DESCRIBE DATABASE db3 -POSTHOOK: type: DESCDATABASE -POSTHOOK: Input: database:db3 -#### A masked pattern was here #### -PREHOOK: query: EXPLAIN -#### A masked pattern was here #### -PREHOOK: type: ALTERDATABASE_LOCATION -PREHOOK: Output: database:db3 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -#### A masked pattern was here #### -POSTHOOK: type: ALTERDATABASE_LOCATION -POSTHOOK: Output: database:db3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 -#### A masked pattern was here #### - name: db3 -#### A masked pattern was here #### - -#### A masked pattern was here #### -PREHOOK: type: ALTERDATABASE_LOCATION -PREHOOK: Output: database:db3 -#### A masked pattern was here #### -POSTHOOK: type: ALTERDATABASE_LOCATION -POSTHOOK: Output: database:db3 -#### A masked pattern was here #### -PREHOOK: query: DESCRIBE DATABASE db3 -PREHOOK: type: DESCDATABASE -PREHOOK: Input: database:db3 -POSTHOOK: query: DESCRIBE DATABASE db3 -POSTHOOK: type: DESCDATABASE -POSTHOOK: Input: database:db3 -#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/database_properties.q.out b/ql/src/test/results/clientpositive/database_properties.q.out similarity index 100% rename from ql/src/test/results/clientpositive/llap/database_properties.q.out rename to ql/src/test/results/clientpositive/database_properties.q.out diff --git a/ql/src/test/results/clientpositive/llap/date_2.q.out b/ql/src/test/results/clientpositive/date_2.q.out similarity index 100% rename from ql/src/test/results/clientpositive/llap/date_2.q.out rename to ql/src/test/results/clientpositive/date_2.q.out index 722f65a304..402dae8e05 100644 --- a/ql/src/test/results/clientpositive/llap/date_2.q.out +++ b/ql/src/test/results/clientpositive/date_2.q.out @@ -330,7 +330,6 @@ POSTHOOK: Input: default@date_2 #### A masked pattern was here #### 2010-10-20 11 2010-10-21 12 -2010-10-29 12 2010-10-22 11 2010-10-23 12 2010-10-24 12 @@ -338,6 +337,7 @@ POSTHOOK: Input: default@date_2 2010-10-26 13 2010-10-27 11 2010-10-28 12 +2010-10-29 12 2010-10-30 11 2010-10-31 8 PREHOOK: query: drop table date_2 diff --git a/ql/src/test/results/clientpositive/llap/date_serde.q.out b/ql/src/test/results/clientpositive/date_serde.q.out similarity index 100% rename from ql/src/test/results/clientpositive/llap/date_serde.q.out rename to ql/src/test/results/clientpositive/date_serde.q.out index 540a8f0e5f..f3985b0bf2 100644 --- a/ql/src/test/results/clientpositive/llap/date_serde.q.out +++ b/ql/src/test/results/clientpositive/date_serde.q.out @@ -215,7 +215,6 @@ POSTHOOK: Input: default@date_serde_regex #### A masked pattern was here #### 2010-10-20 11 2010-10-21 12 -2010-10-29 12 2010-10-22 11 2010-10-23 12 2010-10-24 12 @@ -223,6 +222,7 @@ POSTHOOK: Input: default@date_serde_regex 2010-10-26 13 2010-10-27 11 2010-10-28 12 +2010-10-29 12 2010-10-30 11 2010-10-31 8 PREHOOK: query: create table date_serde_lb ( diff --git a/ql/src/test/results/clientpositive/llap/db_ddl_explain.q.out b/ql/src/test/results/clientpositive/db_ddl_explain.q.out similarity index 100% rename from ql/src/test/results/clientpositive/llap/db_ddl_explain.q.out rename to ql/src/test/results/clientpositive/db_ddl_explain.q.out diff --git a/ql/src/test/results/clientpositive/llap/decimal_join2.q.out b/ql/src/test/results/clientpositive/decimal_join2.q.out similarity index 56% rename from ql/src/test/results/clientpositive/llap/decimal_join2.q.out rename to ql/src/test/results/clientpositive/decimal_join2.q.out index 0f07f09d6d..59d0672132 100644 --- a/ql/src/test/results/clientpositive/llap/decimal_join2.q.out +++ b/ql/src/test/results/clientpositive/decimal_join2.q.out @@ -52,91 +52,87 @@ POSTHOOK: Input: default@decimal_3_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: decimal(38,18)), value (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: decimal(38,18)) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: decimal(38,18)) - Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: decimal(38,18)), value (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: decimal(38,18)) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: decimal(38,18)) - Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: decimal(38,18)) - 1 _col0 (type: decimal(38,18)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: decimal(38,18)), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: decimal(38,18)), _col1 (type: int), _col2 (type: decimal(38,18)), _col3 (type: int) - null sort order: zzzz - sort order: ++++ - Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: + key expressions: _col0 (type: decimal(38,18)) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: decimal(38,18)) + Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(38,18)), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: decimal(38,18)), KEY.reducesinkkey3 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: key (type: decimal(38,18)), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(38,18)) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: decimal(38,18)) + Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(38,18)) + 1 _col0 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: decimal(38,18)), _col1 (type: int), _col2 (type: decimal(38,18)), _col3 (type: int) + null sort order: zzzz + sort order: ++++ + Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(38,18)), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: decimal(38,18)), KEY.reducesinkkey3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -228,84 +224,77 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_3_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: decimal(38,18)), value (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: decimal(38,18)) - 1 _col0 (type: decimal(38,18)) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: decimal(38,18)), _col1 (type: int), _col2 (type: decimal(38,18)), _col3 (type: int) - null sort order: zzzz - sort order: ++++ - Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: decimal(38,18)), value (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: decimal(38,18)) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: decimal(38,18)) - Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:a + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: decimal(38,18)), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: decimal(38,18)) + 1 _col0 (type: decimal(38,18)) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(38,18)), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: decimal(38,18)), KEY.reducesinkkey3 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + expressions: key (type: decimal(38,18)), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 37 Data size: 4292 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(38,18)) + 1 _col0 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: decimal(38,18)), _col1 (type: int), _col2 (type: decimal(38,18)), _col3 (type: int) + null sort order: zzzz + sort order: ++++ + Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(38,18)), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: decimal(38,18)), KEY.reducesinkkey3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 48 Data size: 11136 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/decimal_precision.q.out b/ql/src/test/results/clientpositive/decimal_precision.q.out similarity index 89% rename from ql/src/test/results/clientpositive/llap/decimal_precision.q.out rename to ql/src/test/results/clientpositive/decimal_precision.q.out index b650ebfab5..8aea2dca3f 100644 --- a/ql/src/test/results/clientpositive/llap/decimal_precision.q.out +++ b/ql/src/test/results/clientpositive/decimal_precision.q.out @@ -536,53 +536,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: decimal_precision_n0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: dec (type: decimal(20,10)) - outputColumnNames: dec - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(dec), count(dec) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(30,10)), _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_precision_n0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(20,10)) + outputColumnNames: dec + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) - mode: mergepartial + aggregations: sum(dec), count(dec) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: CAST( (_col0 / _col1) AS decimal(24,14)) (type: decimal(24,14)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10)) - outputColumnNames: _col0, _col1 + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: decimal(30,10)), _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( (_col0 / _col1) AS decimal(24,14)) (type: decimal(24,14)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/decimal_precision2.q.out b/ql/src/test/results/clientpositive/decimal_precision2.q.out similarity index 78% rename from ql/src/test/results/clientpositive/llap/decimal_precision2.q.out rename to ql/src/test/results/clientpositive/decimal_precision2.q.out index 2531fcff31..3cba039026 100644 --- a/ql/src/test/results/clientpositive/llap/decimal_precision2.q.out +++ b/ql/src/test/results/clientpositive/decimal_precision2.q.out @@ -17,9 +17,11 @@ STAGE PLANS: TableScan alias: _dummy_table Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 100.001 (type: decimal(6,3)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select 100.000BD @@ -41,9 +43,11 @@ STAGE PLANS: TableScan alias: _dummy_table Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 100 (type: decimal(3,0)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select 0.000BD @@ -65,9 +69,11 @@ STAGE PLANS: TableScan alias: _dummy_table Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 0 (type: decimal(1,0)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select 0.100BD @@ -89,9 +95,11 @@ STAGE PLANS: TableScan alias: _dummy_table Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 0.1 (type: decimal(1,1)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select 0.010BD @@ -113,9 +121,11 @@ STAGE PLANS: TableScan alias: _dummy_table Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 0.01 (type: decimal(2,2)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select cast(0.010 as decimal(6,3)) @@ -137,9 +147,11 @@ STAGE PLANS: TableScan alias: _dummy_table Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 0.01 (type: decimal(6,3)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select 0.09765625BD * 0.09765625BD * 0.0125BD * 578992BD @@ -161,9 +173,11 @@ STAGE PLANS: TableScan alias: _dummy_table Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 69.0212249755859375 (type: decimal(29,20)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select 0.09765625BD * 0.09765625BD * 0.0125BD * 578992BD diff --git a/ql/src/test/results/clientpositive/llap/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out similarity index 57% rename from ql/src/test/results/clientpositive/llap/decimal_stats.q.out rename to ql/src/test/results/clientpositive/decimal_stats.q.out index e47ddbb61f..0571fae33d 100644 --- a/ql/src/test/results/clientpositive/llap/decimal_stats.q.out +++ b/ql/src/test/results/clientpositive/decimal_stats.q.out @@ -75,53 +75,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: decimal_1_n1 - Statistics: Num rows: 500 Data size: 112112 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: t (type: decimal(4,2)) - null sort order: z - Statistics: Num rows: 500 Data size: 112112 Basic stats: COMPLETE Column stats: COMPLETE - top n: 100 - Select Operator - expressions: t (type: decimal(4,2)), u (type: decimal(5,0)), v (type: decimal(10,0)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 56112 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: decimal(4,2)) - null sort order: z - sort order: + - Statistics: Num rows: 500 Data size: 56112 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: decimal(5,0)), _col2 (type: decimal(10,0)) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: decimal(5,0)), VALUE._col1 (type: decimal(10,0)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 56112 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 100 - Statistics: Num rows: 100 Data size: 22512 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 22512 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_1_n1 + Statistics: Num rows: 500 Data size: 112112 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: t (type: decimal(4,2)), u (type: decimal(5,0)), v (type: decimal(10,0)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 112112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + null sort order: z + sort order: + + Statistics: Num rows: 500 Data size: 112112 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: decimal(5,0)), _col2 (type: decimal(10,0)) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: decimal(5,0)), VALUE._col1 (type: decimal(10,0)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 112112 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 22512 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 22512 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/decimal_udf.q.out b/ql/src/test/results/clientpositive/decimal_udf.q.out similarity index 71% rename from ql/src/test/results/clientpositive/llap/decimal_udf.q.out rename to ql/src/test/results/clientpositive/decimal_udf.q.out index c2d1bc28d5..f00ef023e4 100644 --- a/ql/src/test/results/clientpositive/llap/decimal_udf.q.out +++ b/ql/src/test/results/clientpositive/decimal_udf.q.out @@ -42,9 +42,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + key) (type: decimal(21,10)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key + key FROM DECIMAL_UDF @@ -111,9 +113,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key + CAST( value AS decimal(10,0))) (type: decimal(21,10)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key + value FROM DECIMAL_UDF @@ -180,9 +184,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(key) + (UDFToDouble(value) / 2.0D)) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key + (value/2) FROM DECIMAL_UDF @@ -249,9 +255,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(key) + 1.0D) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key + '1.0' FROM DECIMAL_UDF @@ -318,9 +326,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key - key) (type: decimal(21,10)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key - key FROM DECIMAL_UDF @@ -387,9 +397,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key - CAST( value AS decimal(10,0))) (type: decimal(21,10)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key - value FROM DECIMAL_UDF @@ -456,9 +468,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(key) - (UDFToDouble(value) / 2.0D)) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key - (value/2) FROM DECIMAL_UDF @@ -525,9 +539,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(key) - 1.0D) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key - '1.0' FROM DECIMAL_UDF @@ -594,9 +610,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key * key) (type: decimal(38,17)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key * key FROM DECIMAL_UDF @@ -664,11 +682,14 @@ STAGE PLANS: TableScan alias: decimal_udf filterExpr: ((key * CAST( value AS decimal(10,0))) > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key * CAST( value AS decimal(10,0))) > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: decimal(20,10)), value (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key, value FROM DECIMAL_UDF where key * value > 0 @@ -720,9 +741,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key * CAST( value AS decimal(10,0))) (type: decimal(31,10)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key * value FROM DECIMAL_UDF @@ -789,9 +812,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(key) * (UDFToDouble(value) / 2.0D)) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key * (value/2) FROM DECIMAL_UDF @@ -858,9 +883,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(key) * 2.0D) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key * '2.0' FROM DECIMAL_UDF @@ -927,11 +954,14 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key / 0) (type: decimal(22,12)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key / 0 FROM DECIMAL_UDF limit 1 @@ -961,11 +991,14 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: null (type: double) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: SELECT key / NULL FROM DECIMAL_UDF limit 1 @@ -996,11 +1029,14 @@ STAGE PLANS: TableScan alias: decimal_udf filterExpr: (key <> 0) (type: boolean) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key <> 0) (type: boolean) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key / key) (type: decimal(38,18)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 @@ -1064,11 +1100,14 @@ STAGE PLANS: TableScan alias: decimal_udf filterExpr: (value <> 0) (type: boolean) + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (value <> 0) (type: boolean) + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key / CAST( value AS decimal(10,0))) (type: decimal(31,21)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 @@ -1122,11 +1161,14 @@ STAGE PLANS: TableScan alias: decimal_udf filterExpr: (value <> 0) (type: boolean) + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (value <> 0) (type: boolean) + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(key) / (UDFToDouble(value) / 2.0D)) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 @@ -1179,9 +1221,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (1.0D + (UDFToDouble(key) / 2.0D)) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT 1 + (key / '2.0') FROM DECIMAL_UDF @@ -1248,9 +1292,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: abs(key) (type: decimal(20,10)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT abs(key) FROM DECIMAL_UDF @@ -1309,75 +1355,76 @@ POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: decimal(20,10)), value (type: int) - outputColumnNames: key, value - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(key), count(key) - keys: value (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_udf + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: decimal(20,10)), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial + aggregations: sum(key), count(key) + keys: value (type: int) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,18)), CAST( (_col1 / _col2) AS decimal(24,14)) (type: decimal(24,14)), _col1 (type: decimal(30,10)) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10)) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,18)), VALUE._col1 (type: decimal(24,14)), VALUE._col2 (type: decimal(30,10)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,18)), CAST( (_col1 / _col2) AS decimal(24,14)) (type: decimal(24,14)), _col1 (type: decimal(30,10)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10)) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,18)), VALUE._col1 (type: decimal(24,14)), VALUE._col2 (type: decimal(30,10)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1428,9 +1475,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (- key) (type: decimal(20,10)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT -key FROM DECIMAL_UDF @@ -1497,9 +1546,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT +key FROM DECIMAL_UDF @@ -1566,9 +1617,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ceil(key) (type: decimal(11,0)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT CEIL(key) FROM DECIMAL_UDF @@ -1635,9 +1688,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: floor(key) (type: decimal(11,0)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT FLOOR(key) FROM DECIMAL_UDF @@ -1704,9 +1759,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: round(key, 2) (type: decimal(13,2)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT ROUND(key, 2) FROM DECIMAL_UDF @@ -1773,9 +1830,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: power(key, 2) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF @@ -1842,9 +1901,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ((key + 1) % (key / 2)) (type: decimal(22,12)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF @@ -1907,57 +1968,48 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: int), key (type: decimal(20,10)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col3), sum(_col2), count(_col1) - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_udf + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: int), key (type: decimal(20,10)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial + aggregations: sum(_col3), sum(_col2), count(_col1) + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1973,21 +2025,21 @@ POSTHOOK: query: SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-1234567890 0.0 0.0 -1255 0.0 0.0 +-11 0.0 0.0 -1 0.0 0.0 0 0.22561046704494161 0.05090008284023669 -3 0.0 0.0 -10 0.0 0.0 -100 0.0 0.0 -200 0.0 0.0 --1234567890 0.0 0.0 --11 0.0 0.0 1 0.05928102563215448 0.003514240000000157 2 0.0 0.0 +3 0.0 0.0 4 0.0 0.0 +10 0.0 0.0 20 0.0 0.0 +100 0.0 0.0 124 0.0 0.0 125 0.0 0.0 +200 0.0 0.0 4400 0.0 0.0 1234567890 0.0 0.0 PREHOOK: query: EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value @@ -2004,57 +2056,48 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: int), key (type: decimal(20,10)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col3), sum(_col2), count(_col1) - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_udf + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: int), key (type: decimal(20,10)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial + aggregations: sum(_col3), sum(_col2), count(_col1) + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -2070,21 +2113,21 @@ POSTHOOK: query: SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### +-1234567890 NULL NULL -1255 NULL NULL +-11 NULL NULL -1 0.0 0.0 0 0.23482281918556472 0.05514175641025642 -3 0.0 0.0 -10 NULL NULL -100 NULL NULL -200 NULL NULL --1234567890 NULL NULL --11 NULL NULL 1 0.06627820154470243 0.0043928000000001965 2 0.0 0.0 +3 0.0 0.0 4 NULL NULL +10 NULL NULL 20 NULL NULL +100 NULL NULL 124 NULL NULL 125 NULL NULL +200 NULL NULL 4400 NULL NULL 1234567890 NULL NULL PREHOOK: query: EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF @@ -2101,49 +2144,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: decimal(20,10)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: histogram_numeric(_col0, 3) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: array) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: decimal(20,10)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: histogram_numeric(VALUE._col0) - mode: mergepartial + aggregations: histogram_numeric(_col0, 3) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: array) + Reduce Operator Tree: + Group By Operator + aggregations: histogram_numeric(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -2174,49 +2207,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: decimal(20,10)) - outputColumnNames: key - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(20,10)) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: decimal(20,10)) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial + aggregations: min(key) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: decimal(20,10)) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -2247,49 +2271,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: decimal(20,10)) - outputColumnNames: key - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(key) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(20,10)) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: decimal(20,10)) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial + aggregations: max(key) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: decimal(20,10)) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -2320,49 +2335,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: decimal_udf - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: decimal(20,10)) - outputColumnNames: key - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(key) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_udf + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: decimal(20,10)) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial + aggregations: count(key) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/decimal_udf2.q.out b/ql/src/test/results/clientpositive/decimal_udf2.q.out similarity index 55% rename from ql/src/test/results/clientpositive/llap/decimal_udf2.q.out rename to ql/src/test/results/clientpositive/decimal_udf2.q.out index 04c96d3d2f..eb98a07c8e 100644 --- a/ql/src/test/results/clientpositive/llap/decimal_udf2.q.out +++ b/ql/src/test/results/clientpositive/decimal_udf2.q.out @@ -37,22 +37,38 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_udf2 + filterExpr: (key = 10) (type: boolean) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 10) (type: boolean) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: null (type: double), null (type: double), 1.4711276743037347D (type: double), -0.8390715290764524D (type: double), -0.5440211108893698D (type: double), 0.6483608274590866D (type: double), 0.17453292519943295D (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: decimal_udf2 - filterExpr: (key = 10) (type: boolean) - Filter Operator - predicate: (key = 10) (type: boolean) - Select Operator - expressions: null (type: double), null (type: double), 1.4711276743037347D (type: double), -0.8390715290764524D (type: double), -0.5440211108893698D (type: double), 0.6483608274590866D (type: double), 0.17453292519943295D (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - ListSink + ListSink PREHOOK: query: SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key) FROM DECIMAL_UDF2 WHERE key = 10 @@ -84,22 +100,38 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf2 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_udf2 + filterExpr: (key = 10) (type: boolean) + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 10) (type: boolean) + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 22026.465794806718D (type: double), 2.302585092994046D (type: double), 2.302585092994046D (type: double), 1.0D (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0D (type: double), 3.1622776601683795D (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: decimal_udf2 - filterExpr: (key = 10) (type: boolean) - Filter Operator - predicate: (key = 10) (type: boolean) - Select Operator - expressions: 22026.465794806718D (type: double), 2.302585092994046D (type: double), 2.302585092994046D (type: double), 1.0D (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0D (type: double), 3.1622776601683795D (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - ListSink + ListSink PREHOOK: query: SELECT exp(key), ln(key), diff --git a/ql/src/test/results/clientpositive/llap/describe_database.q.out b/ql/src/test/results/clientpositive/describe_database.q.out similarity index 100% rename from ql/src/test/results/clientpositive/llap/describe_database.q.out rename to ql/src/test/results/clientpositive/describe_database.q.out diff --git a/ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out similarity index 63% rename from ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out rename to ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out index 24d2460d81..ca7ce6f0e4 100644 --- a/ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out @@ -77,55 +77,45 @@ POSTHOOK: Output: default@uservisits_web_text_none_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: uservisits_web_text_none_n0 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) - outputColumnNames: sourceip, adrevenue, avgtimeonsite - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: uservisits_web_text_none_n0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) + outputColumnNames: sourceip, adrevenue, avgtimeonsite + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial + aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Stats Work Basic Stats Work: Column Stats Desc: @@ -147,128 +137,118 @@ POSTHOOK: Output: default@uservisits_web_text_none_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: uservisits_web_text_none_n0 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Statistics Aggregation Key Prefix: default.uservisits_web_text_none_n0/ - GatherStats: true - Select Operator - expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) - outputColumnNames: sourceip, adrevenue, avgtimeonsite - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: + Map Reduce + Map Operator Tree: + TableScan + alias: uservisits_web_text_none_n0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics Aggregation Key Prefix: default.uservisits_web_text_none_n0/ + GatherStats: true + Select Operator + expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) + outputColumnNames: sourceip, adrevenue, avgtimeonsite + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### - Path -> Partition: + Path -> Partition: #### A masked pattern was here #### - Partition - base file name: uservisits_web_text_none_n0 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite - columns.comments - columns.types string:string:string:float:string:string:string:string:int - field.delim | + Partition + base file name: uservisits_web_text_none_n0 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite + columns.comments + columns.types string:string:string:float:string:string:string:string:int + field.delim | #### A masked pattern was here #### - name default.uservisits_web_text_none_n0 - numFiles 1 - numRows 0 - rawDataSize 0 - serialization.ddl struct uservisits_web_text_none_n0 { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 7060 + name default.uservisits_web_text_none_n0 + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct uservisits_web_text_none_n0 { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7060 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite - columns.comments - columns.types string:string:string:float:string:string:string:string:int - field.delim | + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite + columns.comments + columns.types string:string:string:float:string:string:string:string:int + field.delim | #### A masked pattern was here #### - name default.uservisits_web_text_none_n0 - numFiles 1 - numRows 0 - rawDataSize 0 - serialization.ddl struct uservisits_web_text_none_n0 { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 7060 + name default.uservisits_web_text_none_n0 + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct uservisits_web_text_none_n0 { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7060 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.uservisits_web_text_none_n0 - name: default.uservisits_web_text_none_n0 - Truncated Path -> Alias: - /uservisits_web_text_none_n0 [uservisits_web_text_none_n0] - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.uservisits_web_text_none_n0 + name: default.uservisits_web_text_none_n0 + Truncated Path -> Alias: + /uservisits_web_text_none_n0 [uservisits_web_text_none_n0] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false - Stage: Stage-2 + Stage: Stage-1 Stats Work Basic Stats Work: Stats Aggregation Key Prefix: default.uservisits_web_text_none_n0/ @@ -398,55 +378,45 @@ POSTHOOK: Output: default@empty_tab_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: empty_tab_n0 - Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: double), c (type: string), d (type: boolean), e (type: binary) - outputColumnNames: a, b, c, d, e - Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: empty_tab_n0 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: double), c (type: string), d (type: boolean), e (type: binary) + outputColumnNames: a, b, c, d, e + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) - mode: mergepartial + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/distinct_groupby.q.out b/ql/src/test/results/clientpositive/distinct_groupby.q.out new file mode 100644 index 0000000000..b5c49770cb --- /dev/null +++ b/ql/src/test/results/clientpositive/distinct_groupby.q.out @@ -0,0 +1,2221 @@ +PREHOOK: query: explain select distinct key from src1 group by key,value +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct key from src1 group by key,value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct key from src1 group by key,value +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct key from src1 group by key,value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + +128 +146 +150 +213 +224 +238 +255 +273 +278 +311 +369 +401 +406 +66 +98 +PREHOOK: query: explain select distinct count(value) from src group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct count(value) from src group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(value) + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct count(value) from src group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select distinct count(value) from src group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 +2 +3 +4 +5 +PREHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct count(*) from src1 where key in (128,146,150) +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct count(*) from src1 where key in (128,146,150) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +3 +PREHOOK: query: explain select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +3 +PREHOOK: query: explain select distinct count(*)+1 from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct count(*)+1 from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 + 1L) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct count(*)+1 from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct count(*)+1 from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +26 +PREHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 39 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col1), count(_col3) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +37 37 +PREHOOK: query: explain select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +1 +PREHOOK: query: explain select distinct key from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct key from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct key from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct key from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + +128 +146 +150 +213 +224 +238 +255 +273 +278 +311 +369 +401 +406 +66 +98 +PREHOOK: query: explain select distinct * from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + + val_165 + val_193 + val_265 + val_27 + val_409 + val_484 +128 +146 val_146 +150 val_150 +213 val_213 +224 +238 val_238 +255 val_255 +273 val_273 +278 val_278 +311 val_311 +369 +401 val_401 +406 val_406 +66 val_66 +98 val_98 +PREHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct count(*) from src1 where key in (128,146,150) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct count(*) from src1 where key in (128,146,150) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +1 +PREHOOK: query: explain select distinct key, count(*) from src1 where key in (128,146,150) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct key, count(*) from src1 where key in (128,146,150) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct key, count(*) from src1 where key in (128,146,150) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct key, count(*) from src1 where key in (128,146,150) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 1 +146 1 +150 1 +PREHOOK: query: explain select distinct * from (select * from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from (select * from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from (select * from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from (select * from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + + val_165 + val_193 + val_265 + val_27 + val_409 + val_484 +128 +146 val_146 +150 val_150 +213 val_213 +224 +238 val_238 +255 val_255 +273 val_273 +278 val_278 +311 val_311 +369 +401 val_401 +406 val_406 +66 val_66 +98 val_98 +PREHOOK: query: explain select distinct * from (select count(*) from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from (select count(*) from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from (select count(*) from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from (select count(*) from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +25 +PREHOOK: query: explain select distinct * from (select * from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from (select * from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from (select * from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from (select * from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 +146 val_146 +150 val_150 +PREHOOK: query: explain select distinct key from (select * from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct key from (select * from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct key from (select * from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct key from (select * from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 +146 +150 +PREHOOK: query: explain select distinct * from (select count(*) from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from (select count(*) from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from (select count(*) from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from (select count(*) from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +3 +PREHOOK: query: explain select distinct sum(key) over () from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct sum(key) over () from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col0 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: double) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct sum(key) over () from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct sum(key) over () from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +3556.0 +PREHOOK: query: explain select distinct * from (select sum(key) over () from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from (select sum(key) over () from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col0 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: double) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from (select sum(key) over () from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from (select sum(key) over () from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +3556.0 +PREHOOK: query: explain select distinct value, key, count(1) over (partition by value) from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct value, key, count(1) over (partition by value) from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: value (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: count_window_0 + arguments: 1 + name: count + window function: GenericUDAFCountEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), count_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct value, key, count(1) over (partition by value) from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct value, key, count(1) over (partition by value) from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + 7 + 128 7 + 224 7 + 369 7 +val_146 146 1 +val_150 150 1 +val_165 1 +val_193 1 +val_213 213 1 +val_238 238 1 +val_255 255 1 +val_265 1 +val_27 1 +val_273 273 1 +val_278 278 1 +val_311 311 1 +val_401 401 1 +val_406 406 1 +val_409 1 +val_484 1 +val_66 66 1 +val_98 98 1 +PREHOOK: query: explain select value, key, count(1) over (partition by value) from src1 group by value, key +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select value, key, count(1) over (partition by value) from src1 group by value, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: count_window_0 + arguments: 1 + name: count + window function: GenericUDAFCountEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), count_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select value, key, count(1) over (partition by value) from src1 group by value, key +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select value, key, count(1) over (partition by value) from src1 group by value, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + 224 4 + 128 4 + 369 4 + 4 +val_146 146 1 +val_150 150 1 +val_165 1 +val_193 1 +val_213 213 1 +val_238 238 1 +val_255 255 1 +val_265 1 +val_27 1 +val_273 273 1 +val_278 278 1 +val_311 311 1 +val_401 401 1 +val_406 406 1 +val_409 1 +val_484 1 +val_66 66 1 +val_98 98 1 +PREHOOK: query: explain select value, key, count(1) over (partition by value) from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select value, key, count(1) over (partition by value) from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: value (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: count_window_0 + arguments: 1 + name: count + window function: GenericUDAFCountEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), count_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select value, key, count(1) over (partition by value) from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select value, key, count(1) over (partition by value) from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + 7 + 7 + 7 + 7 + 128 7 + 369 7 + 224 7 +val_146 146 1 +val_150 150 1 +val_165 1 +val_193 1 +val_213 213 1 +val_238 238 1 +val_255 255 1 +val_265 1 +val_27 1 +val_273 273 1 +val_278 278 1 +val_311 311 1 +val_401 401 1 +val_406 406 1 +val_409 1 +val_484 1 +val_66 66 1 +val_98 98 1 +PREHOOK: query: explain select distinct count(*)+key from src1 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct count(*)+key from src1 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (UDFToDouble(_col1) + UDFToDouble(_col0)) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: double) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct count(*)+key from src1 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct count(*)+key from src1 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +67.0 +99.0 +129.0 +147.0 +151.0 +214.0 +225.0 +239.0 +256.0 +274.0 +279.0 +312.0 +370.0 +402.0 +407.0 +NULL +PREHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 39 Data size: 10413 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col1), count(_col3) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: bigint), _col2 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: bigint) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +1 1 +2 2 +3 3 +4 4 +5 5 +PREHOOK: query: explain select distinct * from (select distinct * from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from (select distinct * from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from (select distinct * from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from (select distinct * from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + + val_165 + val_193 + val_265 + val_27 + val_409 + val_484 +128 +146 val_146 +150 val_150 +213 val_213 +224 +238 val_238 +255 val_255 +273 val_273 +278 val_278 +311 val_311 +369 +401 val_401 +406 val_406 +66 val_66 +98 val_98 diff --git a/ql/src/test/results/clientpositive/distinct_stats.q.out b/ql/src/test/results/clientpositive/distinct_stats.q.out new file mode 100644 index 0000000000..c1ebfc3d57 --- /dev/null +++ b/ql/src/test/results/clientpositive/distinct_stats.q.out @@ -0,0 +1,219 @@ +PREHOOK: query: create table t1_n11 (a string, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1_n11 +POSTHOOK: query: create table t1_n11 (a string, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1_n11 +PREHOOK: query: insert into table t1_n11 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t1_n11 +POSTHOOK: query: insert into table t1_n11 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1_n11 +POSTHOOK: Lineage: t1_n11.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1_n11.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: analyze table t1_n11 compute statistics for columns a,b +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@t1_n11 +PREHOOK: Output: default@t1_n11 +#### A masked pattern was here #### +POSTHOOK: query: analyze table t1_n11 compute statistics for columns a,b +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@t1_n11 +POSTHOOK: Output: default@t1_n11 +#### A masked pattern was here #### +PREHOOK: query: explain +select count(distinct b) from t1_n11 group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n11 +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(distinct b) from t1_n11 group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n11 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_n11 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT b) + keys: a (type: string), b (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select distinct(b) from t1_n11 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n11 +#### A masked pattern was here #### +POSTHOOK: query: explain +select distinct(b) from t1_n11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n11 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_n11 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: b (type: string) + outputColumnNames: b + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: b (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a, count(*) from t1_n11 group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n11 +#### A masked pattern was here #### +POSTHOOK: query: explain +select a, count(*) from t1_n11 group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n11 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_n11 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: string) + outputColumnNames: a + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: a (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table t1_n11 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1_n11 +PREHOOK: Output: default@t1_n11 +POSTHOOK: query: drop table t1_n11 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1_n11 +POSTHOOK: Output: default@t1_n11 diff --git a/ql/src/test/results/clientpositive/distinct_windowing.q.out b/ql/src/test/results/clientpositive/distinct_windowing.q.out new file mode 100644 index 0000000000..9fefb501b8 --- /dev/null +++ b/ql/src/test/results/clientpositive/distinct_windowing.q.out @@ -0,0 +1,478 @@ +PREHOOK: query: drop table over10k_n15 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k_n15 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k_n15( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k_n15 +POSTHOOK: query: create table over10k_n15( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k_n15 +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k_n15 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k_n15 +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k_n15 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k_n15 +PREHOOK: query: explain +select distinct first_value(t) over ( partition by si order by i ) from over10k_n15 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n15 +#### A masked pattern was here #### +POSTHOOK: query: explain +select distinct first_value(t) over ( partition by si order by i ) from over10k_n15 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n15 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k_n15 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: si (type: smallint), i (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: si (type: smallint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: t (type: tinyint) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col1: smallint, _col2: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: first_value_window_0 + arguments: _col0 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: first_value_window_0 (type: tinyint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: tinyint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select distinct first_value(t) over ( partition by si order by i ) from over10k_n15 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n15 +#### A masked pattern was here #### +POSTHOOK: query: select distinct first_value(t) over ( partition by si order by i ) from over10k_n15 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n15 +#### A masked pattern was here #### +-2 +-1 +0 +1 +2 +3 +4 +6 +7 +8 +PREHOOK: query: explain +select distinct last_value(i) over ( partition by si order by i ) +from over10k_n15 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n15 +#### A masked pattern was here #### +POSTHOOK: query: explain +select distinct last_value(i) over ( partition by si order by i ) +from over10k_n15 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n15 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k_n15 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: si (type: smallint), i (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: si (type: smallint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col2: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: last_value_window_0 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: last_value_window_0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select distinct last_value(i) over ( partition by si order by i ) +from over10k_n15 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n15 +#### A masked pattern was here #### +POSTHOOK: query: select distinct last_value(i) over ( partition by si order by i ) +from over10k_n15 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n15 +#### A masked pattern was here #### +65536 +65537 +65538 +65539 +65540 +65541 +65542 +65543 +65544 +65545 +PREHOOK: query: explain +select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k_n15 limit 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n15 +#### A masked pattern was here #### +POSTHOOK: query: explain +select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k_n15 limit 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n15 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k_n15 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: si (type: smallint), i (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: si (type: smallint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: t (type: tinyint) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col1: smallint, _col2: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: last_value_window_0 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: first_value_window_1 + arguments: _col0 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: tinyint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: tinyint) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 50 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 50 + Processor Tree: + ListSink + +PREHOOK: query: select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k_n15 limit 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n15 +#### A masked pattern was here #### +POSTHOOK: query: select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k_n15 limit 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n15 +#### A masked pattern was here #### +65536 -2 +65536 2 +65536 9 +65536 12 +65536 13 +65536 18 +65536 22 +65536 23 +65536 27 +65536 37 +65536 39 +65536 42 +65536 48 +65536 55 +65536 56 +65536 58 +65536 61 +65536 69 +65536 71 +65536 73 +65536 75 +65536 78 +65536 80 +65536 83 +65536 84 +65536 88 +65536 94 +65536 104 +65536 107 +65536 108 +65536 111 +65536 114 +65536 118 +65536 119 +65536 121 +65537 4 +65537 8 +65537 9 +65537 11 +65537 18 +65537 22 +65537 25 +65537 36 +65537 51 +65537 53 +65537 54 +65537 55 +65537 56 +65537 57 +65537 59 diff --git a/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out b/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out new file mode 100644 index 0000000000..b08dd014c4 --- /dev/null +++ b/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out @@ -0,0 +1,841 @@ +PREHOOK: query: drop table over10k_n14 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k_n14 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k_n14( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k_n14 +POSTHOOK: query: create table over10k_n14( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k_n14 +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k_n14 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k_n14 +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k_n14 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k_n14 +PREHOOK: query: explain +select distinct first_value(t) over ( partition by si order by i ) from over10k_n14 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +POSTHOOK: query: explain +select distinct first_value(t) over ( partition by si order by i ) from over10k_n14 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k_n14 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: si (type: smallint), i (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: si (type: smallint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: t (type: tinyint) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col1: smallint, _col2: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: first_value_window_0 + arguments: _col0 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: first_value_window_0 (type: tinyint) + outputColumnNames: first_value_window_0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: first_value_window_0 (type: tinyint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select distinct first_value(t) over ( partition by si order by i ) from over10k_n14 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +POSTHOOK: query: select distinct first_value(t) over ( partition by si order by i ) from over10k_n14 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +-2 +-1 +0 +1 +2 +3 +4 +6 +7 +8 +PREHOOK: query: explain +select distinct last_value(i) over ( partition by si order by i ) +from over10k_n14 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +POSTHOOK: query: explain +select distinct last_value(i) over ( partition by si order by i ) +from over10k_n14 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k_n14 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: si (type: smallint), i (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: si (type: smallint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col2: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: last_value_window_0 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: last_value_window_0 (type: int) + outputColumnNames: last_value_window_0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: last_value_window_0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select distinct last_value(i) over ( partition by si order by i ) +from over10k_n14 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +POSTHOOK: query: select distinct last_value(i) over ( partition by si order by i ) +from over10k_n14 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +65536 +65537 +65538 +65539 +65540 +65541 +65542 +65543 +65544 +65545 +PREHOOK: query: explain +select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k_n14 limit 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +POSTHOOK: query: explain +select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k_n14 limit 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k_n14 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: si (type: smallint), i (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: si (type: smallint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: t (type: tinyint) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col1: smallint, _col2: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: last_value_window_0 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: first_value_window_1 + arguments: _col0 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) + outputColumnNames: last_value_window_0, first_value_window_1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: tinyint) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 50 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 50 + Processor Tree: + ListSink + +PREHOOK: query: select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k_n14 limit 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +POSTHOOK: query: select distinct last_value(i) over ( partition by si order by i ), + first_value(t) over ( partition by si order by i ) +from over10k_n14 limit 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +65536 -2 +65536 2 +65536 9 +65536 12 +65536 13 +65536 18 +65536 22 +65536 23 +65536 27 +65536 37 +65536 39 +65536 42 +65536 48 +65536 55 +65536 56 +65536 58 +65536 61 +65536 69 +65536 71 +65536 73 +65536 75 +65536 78 +65536 80 +65536 83 +65536 84 +65536 88 +65536 94 +65536 104 +65536 107 +65536 108 +65536 111 +65536 114 +65536 118 +65536 119 +65536 121 +65537 4 +65537 8 +65537 9 +65537 11 +65537 18 +65537 22 +65537 25 +65537 36 +65537 51 +65537 53 +65537 54 +65537 55 +65537 56 +65537 57 +65537 59 +PREHOOK: query: explain +select si, max(f) mf, rank() over ( partition by si order by mf ) +FROM over10k_n14 +GROUP BY si +HAVING max(f) > 0 +limit 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +POSTHOOK: query: explain +select si, max(f) mf, rank() over ( partition by si order by mf ) +FROM over10k_n14 +GROUP BY si +HAVING max(f) > 0 +limit 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k_n14 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), f (type: float) + outputColumnNames: si, f + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(f) + keys: si (type: smallint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: float) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 0.0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: float) + null sort order: az + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: float) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: smallint, _col1: float + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: float), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 50 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 50 + Processor Tree: + ListSink + +PREHOOK: query: select si, max(f) mf, rank() over ( partition by si order by mf ) +FROM over10k_n14 +GROUP BY si +HAVING max(f) > 0 +limit 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +POSTHOOK: query: select si, max(f) mf, rank() over ( partition by si order by mf ) +FROM over10k_n14 +GROUP BY si +HAVING max(f) > 0 +limit 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +256 94.87 1 +257 98.0 1 +258 98.19 1 +259 99.71 1 +260 99.78 1 +261 98.09 1 +262 98.41 1 +263 97.32 1 +264 97.65 1 +265 96.18 1 +266 99.41 1 +267 99.8 1 +268 93.34 1 +269 99.24 1 +270 98.57 1 +271 96.02 1 +272 92.82 1 +273 97.51 1 +274 95.43 1 +275 99.68 1 +276 98.94 1 +277 97.26 1 +278 98.56 1 +279 98.09 1 +280 99.21 1 +281 99.32 1 +282 95.49 1 +283 96.46 1 +284 99.34 1 +285 99.34 1 +286 92.77 1 +287 99.29 1 +288 96.71 1 +289 97.13 1 +290 99.88 1 +291 99.18 1 +292 94.99 1 +293 95.36 1 +294 99.34 1 +295 90.67 1 +296 96.85 1 +297 95.62 1 +298 99.98 1 +299 99.36 1 +300 98.76 1 +301 99.08 1 +302 99.84 1 +303 98.57 1 +304 94.68 1 +305 96.1 1 +PREHOOK: query: explain +select distinct si, rank() over ( partition by si order by i ) +FROM over10k_n14 +limit 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +POSTHOOK: query: explain +select distinct si, rank() over ( partition by si order by i ) +FROM over10k_n14 +limit 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k_n14 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: si (type: smallint), i (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: si (type: smallint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col2: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rank_window_0 (type: int), _col1 (type: smallint) + outputColumnNames: rank_window_0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: smallint), rank_window_0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 50 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 50 + Processor Tree: + ListSink + +PREHOOK: query: select distinct si, rank() over ( partition by si order by i ) +FROM over10k_n14 +limit 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +POSTHOOK: query: select distinct si, rank() over ( partition by si order by i ) +FROM over10k_n14 +limit 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k_n14 +#### A masked pattern was here #### +256 1 +256 2 +256 3 +256 4 +256 5 +256 6 +256 7 +256 8 +256 9 +256 10 +256 11 +256 13 +256 14 +256 15 +256 16 +256 17 +256 18 +256 19 +256 20 +256 21 +256 22 +256 23 +256 24 +256 25 +256 26 +256 27 +256 28 +256 29 +256 30 +256 32 +256 33 +256 34 +256 35 +256 37 +257 1 +257 2 +257 3 +257 4 +257 5 +257 6 +257 7 +257 8 +257 9 +257 10 +257 11 +257 12 +257 13 +257 14 +257 16 +257 17 diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_insert.q.out b/ql/src/test/results/clientpositive/dynamic_partition_insert.q.out similarity index 100% rename from ql/src/test/results/clientpositive/llap/dynamic_partition_insert.q.out rename to ql/src/test/results/clientpositive/dynamic_partition_insert.q.out index 8965dab21f..2fab374433 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_insert.q.out +++ b/ql/src/test/results/clientpositive/dynamic_partition_insert.q.out @@ -665,60 +665,60 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### val_0 -val_11 -val_15 -val_19 -val_26 -val_27 -val_33 -val_35 -val_41 -val_43 -val_5 -val_54 -val_58 -val_64 -val_67 -val_69 -val_70 -val_77 -val_78 -val_8 -val_83 -val_84 -val_85 -val_9 -val_90 -val_92 -val_96 val_10 +val_11 val_12 +val_15 val_17 val_18 +val_19 val_2 val_20 val_24 +val_26 +val_27 val_28 val_30 +val_33 val_34 +val_35 val_37 val_4 +val_41 val_42 +val_43 val_44 val_47 +val_5 val_51 val_53 +val_54 val_57 +val_58 +val_64 val_65 val_66 +val_67 +val_69 +val_70 val_72 val_74 val_76 +val_77 +val_78 +val_8 val_80 val_82 +val_83 +val_84 +val_85 val_86 val_87 +val_9 +val_90 +val_92 val_95 +val_96 val_97 val_98 PREHOOK: query: SHOW PARTITIONS dest1_n143 @@ -793,50 +793,42 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### val_100 +val_103 val_104 +val_105 val_111 val_113 -val_116 -val_119 -val_125 -val_133 -val_136 -val_137 -val_138 -val_143 -val_153 -val_158 -val_162 -val_163 -val_165 -val_175 -val_177 -val_181 -val_183 -val_187 -val_192 -val_194 -val_196 -val_103 -val_105 val_114 +val_116 val_118 +val_119 val_120 +val_125 val_126 val_128 val_129 val_131 +val_133 val_134 +val_136 +val_137 +val_138 +val_143 val_145 val_146 val_149 val_150 val_152 +val_153 val_155 val_156 val_157 +val_158 val_160 +val_162 +val_163 val_164 +val_165 val_166 val_167 val_168 @@ -844,16 +836,24 @@ val_169 val_170 val_172 val_174 +val_175 val_176 +val_177 val_178 val_179 val_180 +val_181 +val_183 val_186 +val_187 val_189 val_190 val_191 +val_192 val_193 +val_194 val_195 +val_196 val_197 val_199 PREHOOK: query: SHOW PARTITIONS dest2_n37 @@ -1100,8 +1100,8 @@ John Doe 23 USA CA NULL NULL USA CA Jane Doe 22 USA TX NULL NULL USA TX -John Doe 23 USA __HIVE_DEFAULT_PARTITION__ Jane Doe 22 USA __HIVE_DEFAULT_PARTITION__ +John Doe 23 USA __HIVE_DEFAULT_PARTITION__ PREHOOK: query: INSERT INTO TABLE table2_n10( name, country) SELECT name, country FROM table1_n15 PREHOOK: type: QUERY PREHOOK: Input: default@table1_n15 @@ -1143,10 +1143,10 @@ John Doe 23 USA CA NULL NULL USA CA Jane Doe 22 USA TX NULL NULL USA TX -John Doe 23 USA __HIVE_DEFAULT_PARTITION__ Jane Doe 22 USA __HIVE_DEFAULT_PARTITION__ -John Doe NULL USA __HIVE_DEFAULT_PARTITION__ +John Doe 23 USA __HIVE_DEFAULT_PARTITION__ Jane Doe NULL USA __HIVE_DEFAULT_PARTITION__ +John Doe NULL USA __HIVE_DEFAULT_PARTITION__ PREHOOK: query: DROP TABLE table2_n10 PREHOOK: type: DROPTABLE PREHOOK: Input: default@table2_n10 diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_skip_default.q.out b/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out similarity index 96% rename from ql/src/test/results/clientpositive/llap/dynamic_partition_skip_default.q.out rename to ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out index a3ec3997d8..cd8e9c0553 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_skip_default.q.out +++ b/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out @@ -114,10 +114,12 @@ STAGE PLANS: TableScan alias: dynamic_part_table filterExpr: ((partcol1 = '1') and (partcol2 = '1')) (type: boolean) + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: intcol (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select intcol from dynamic_part_table where partcol1='1' and partcol2='1' @@ -192,10 +194,12 @@ STAGE PLANS: TableScan alias: dynamic_part_table filterExpr: ((partcol1 = '1') and (partcol2 = '1')) (type: boolean) + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: intcol (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select intcol from dynamic_part_table where (partcol1='1' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__') @@ -319,9 +323,11 @@ STAGE PLANS: TableScan alias: dynamic_part_table filterExpr: ((partcol2) IN ('1', '__HIVE_DEFAULT_PARTITION__') and (partcol1 = '1')) (type: boolean) + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: intcol (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/llap/dynpart_merge.q.out b/ql/src/test/results/clientpositive/dynpart_merge.q.out similarity index 100% rename from ql/src/test/results/clientpositive/llap/dynpart_merge.q.out rename to ql/src/test/results/clientpositive/dynpart_merge.q.out index b25f3b239f..1c6f556052 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_merge.q.out +++ b/ql/src/test/results/clientpositive/dynpart_merge.q.out @@ -80,8 +80,8 @@ POSTHOOK: Input: default@tdp@dataint=20150316/hour=16/req=reqA POSTHOOK: Input: default@tdp@dataint=20150316/hour=16/req=reqB POSTHOOK: Input: default@tdp@dataint=20150316/hour=16/req=reqD #### A masked pattern was here #### -clusterIdA cacheId1 20150316 16 reqA clusterIdA cacheId1 20150316 16 reqB +clusterIdA cacheId1 20150316 16 reqA clusterIdB cacheId2 20150316 16 reqB clusterIdC cacheId3 20150316 16 reqA clusterIdD cacheId4 20150316 16 reqD diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out new file mode 100644 index 0000000000..7811f7cbd6 --- /dev/null +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out @@ -0,0 +1,90 @@ +PREHOOK: query: CREATE TABLE non_acid(key string, value string) +PARTITIONED BY(ds string, hr int) +CLUSTERED BY(key) INTO 2 BUCKETS +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@non_acid +POSTHOOK: query: CREATE TABLE non_acid(key string, value string) +PARTITIONED BY(ds string, hr int) +CLUSTERED BY(key) INTO 2 BUCKETS +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@non_acid +PREHOOK: query: explain +insert into table non_acid partition(ds,hr) select * from srcpart sort by value +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@non_acid +POSTHOOK: query: explain +insert into table non_acid partition(ds,hr) select * from srcpart sort by value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string), UDFToInteger(UDFToInteger(_col3)) (type: int), _bucket_number (type: string), _col1 (type: string) + null sort order: aaaa + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), UDFToInteger(UDFToInteger(_col3)) (type: int) + Statistics: Num rows: 2000 Data size: 732000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._bucket_number (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _bucket_number + File Output Operator + compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED + Statistics: Num rows: 2000 Data size: 732000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.non_acid + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.non_acid + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.non_acid + diff --git a/ql/src/test/results/clientpositive/escape_clusterby1.q.out b/ql/src/test/results/clientpositive/escape_clusterby1.q.out new file mode 100644 index 0000000000..84cf1e5d78 --- /dev/null +++ b/ql/src/test/results/clientpositive/escape_clusterby1.q.out @@ -0,0 +1,102 @@ +PREHOOK: query: explain +select key, value from src cluster by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select key, value from src cluster by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select `key`, value from src cluster by `key`, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select `key`, value from src cluster by `key`, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/escape_distributeby1.q.out b/ql/src/test/results/clientpositive/escape_distributeby1.q.out new file mode 100644 index 0000000000..ef9aec337c --- /dev/null +++ b/ql/src/test/results/clientpositive/escape_distributeby1.q.out @@ -0,0 +1,102 @@ +PREHOOK: query: explain +select key, value from src distribute by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select key, value from src distribute by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select `key`, value from src distribute by `key`, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select `key`, value from src distribute by `key`, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/escape_orderby1.q.out b/ql/src/test/results/clientpositive/escape_orderby1.q.out new file mode 100644 index 0000000000..5a4e6f175a --- /dev/null +++ b/ql/src/test/results/clientpositive/escape_orderby1.q.out @@ -0,0 +1,100 @@ +PREHOOK: query: explain +select key, value from src order by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select key, value from src order by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select `key`, value from src order by `key`, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select `key`, value from src order by `key`, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/escape_sortby1.q.out b/ql/src/test/results/clientpositive/escape_sortby1.q.out new file mode 100644 index 0000000000..182e1e72ba --- /dev/null +++ b/ql/src/test/results/clientpositive/escape_sortby1.q.out @@ -0,0 +1,100 @@ +PREHOOK: query: explain +select key, value from src sort by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select key, value from src sort by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select `key`, value from src sort by `key`, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select `key`, value from src sort by `key`, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/except_all.q.out b/ql/src/test/results/clientpositive/except_all.q.out new file mode 100644 index 0000000000..5d1dc2211a --- /dev/null +++ b/ql/src/test/results/clientpositive/except_all.q.out @@ -0,0 +1,1047 @@ +PREHOOK: query: create table a_n15(key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a_n15 +POSTHOOK: query: create table a_n15(key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a_n15 +PREHOOK: query: insert into table a_n15 values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@a_n15 +POSTHOOK: query: insert into table a_n15 values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@a_n15 +POSTHOOK: Lineage: a_n15.key SCRIPT [] +PREHOOK: query: create table b_n11(key bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b_n11 +POSTHOOK: query: create table b_n11(key bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b_n11 +PREHOOK: query: insert into table b_n11 values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@b_n11 +POSTHOOK: query: insert into table b_n11 values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@b_n11 +POSTHOOK: Lineage: b_n11.key SCRIPT [] +PREHOOK: query: select * from a_n15 except all select * from b_n11 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n15 +PREHOOK: Input: default@b_n11 +#### A masked pattern was here #### +POSTHOOK: query: select * from a_n15 except all select * from b_n11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n15 +POSTHOOK: Input: default@b_n11 +#### A masked pattern was here #### +0 +2 +2 +PREHOOK: query: drop table a_n15 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@a_n15 +PREHOOK: Output: default@a_n15 +POSTHOOK: query: drop table a_n15 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@a_n15 +POSTHOOK: Output: default@a_n15 +PREHOOK: query: drop table b_n11 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@b_n11 +PREHOOK: Output: default@b_n11 +POSTHOOK: query: drop table b_n11 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@b_n11 +POSTHOOK: Output: default@b_n11 +PREHOOK: query: create table a_n15(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a_n15 +POSTHOOK: query: create table a_n15(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a_n15 +PREHOOK: query: insert into table a_n15 values (1,2),(1,2),(1,3),(2,3),(2,2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@a_n15 +POSTHOOK: query: insert into table a_n15 values (1,2),(1,2),(1,3),(2,3),(2,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@a_n15 +POSTHOOK: Lineage: a_n15.key SCRIPT [] +POSTHOOK: Lineage: a_n15.value SCRIPT [] +PREHOOK: query: create table b_n11(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b_n11 +POSTHOOK: query: create table b_n11(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b_n11 +PREHOOK: query: insert into table b_n11 values (1,2),(2,3),(2,2),(2,2),(2,20) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@b_n11 +POSTHOOK: query: insert into table b_n11 values (1,2),(2,3),(2,2),(2,2),(2,20) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@b_n11 +POSTHOOK: Lineage: b_n11.key SCRIPT [] +POSTHOOK: Lineage: b_n11.value SCRIPT [] +PREHOOK: query: select * from a_n15 except all select * from b_n11 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n15 +PREHOOK: Input: default@b_n11 +#### A masked pattern was here #### +POSTHOOK: query: select * from a_n15 except all select * from b_n11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n15 +POSTHOOK: Input: default@b_n11 +#### A masked pattern was here #### +1 2 +1 3 +PREHOOK: query: select * from b_n11 except all select * from a_n15 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n15 +PREHOOK: Input: default@b_n11 +#### A masked pattern was here #### +POSTHOOK: query: select * from b_n11 except all select * from a_n15 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n15 +POSTHOOK: Input: default@b_n11 +#### A masked pattern was here #### +2 2 +2 20 +PREHOOK: query: select * from b_n11 except all select * from a_n15 intersect distinct select * from b_n11 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n15 +PREHOOK: Input: default@b_n11 +#### A masked pattern was here #### +POSTHOOK: query: select * from b_n11 except all select * from a_n15 intersect distinct select * from b_n11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n15 +POSTHOOK: Input: default@b_n11 +#### A masked pattern was here #### +2 2 +2 20 +PREHOOK: query: select * from b_n11 except all select * from a_n15 except distinct select * from b_n11 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n15 +PREHOOK: Input: default@b_n11 +#### A masked pattern was here #### +POSTHOOK: query: select * from b_n11 except all select * from a_n15 except distinct select * from b_n11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n15 +POSTHOOK: Input: default@b_n11 +#### A masked pattern was here #### +PREHOOK: query: select * from a_n15 except all select * from b_n11 union all select * from a_n15 except distinct select * from b_n11 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n15 +PREHOOK: Input: default@b_n11 +#### A masked pattern was here #### +POSTHOOK: query: select * from a_n15 except all select * from b_n11 union all select * from a_n15 except distinct select * from b_n11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n15 +POSTHOOK: Input: default@b_n11 +#### A masked pattern was here #### +1 3 +PREHOOK: query: select * from a_n15 except all select * from b_n11 union select * from a_n15 except distinct select * from b_n11 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n15 +PREHOOK: Input: default@b_n11 +#### A masked pattern was here #### +POSTHOOK: query: select * from a_n15 except all select * from b_n11 union select * from a_n15 except distinct select * from b_n11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n15 +POSTHOOK: Input: default@b_n11 +#### A masked pattern was here #### +1 3 +PREHOOK: query: select * from a_n15 except all select * from b_n11 except distinct select * from a_n15 except distinct select * from b_n11 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n15 +PREHOOK: Input: default@b_n11 +#### A masked pattern was here #### +POSTHOOK: query: select * from a_n15 except all select * from b_n11 except distinct select * from a_n15 except distinct select * from b_n11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n15 +POSTHOOK: Input: default@b_n11 +#### A masked pattern was here #### +PREHOOK: query: select * from (select a_n15.key, b_n11.value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub1 +except all +select * from (select a_n15.key, b_n11.value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub2 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n15 +PREHOOK: Input: default@b_n11 +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a_n15.key, b_n11.value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub1 +except all +select * from (select a_n15.key, b_n11.value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n15 +POSTHOOK: Input: default@b_n11 +#### A masked pattern was here #### +PREHOOK: query: select * from (select a_n15.key, b_n11.value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub1 +except all +select * from (select b_n11.value as key, a_n15.key as value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub2 +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n15 +PREHOOK: Input: default@b_n11 +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a_n15.key, b_n11.value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub1 +except all +select * from (select b_n11.value as key, a_n15.key as value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n15 +POSTHOOK: Input: default@b_n11 +#### A masked pattern was here #### +1 2 +1 2 +1 2 +2 3 +2 3 +2 20 +2 20 +PREHOOK: query: explain select * from src except all select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select * from src except all select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 2L (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col3), sum(_col2) + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + TableScan + Union + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col3), sum(_col2) + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ((2L * _col2) - (3L * _col3)) (type: bigint), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + function name: UDTFReplicateRows + Select Operator + expressions: col1 (type: string), col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 1L (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src except all select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src except all select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain select * from src except all select * from src except distinct select * from src except distinct select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select * from src except all select * from src except distinct select * from src except distinct select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-6 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3, Stage-7 + Stage-5 depends on stages: Stage-4, Stage-8 + Stage-6 is a root stage + Stage-7 is a root stage + Stage-8 is a root stage + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 2L (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col3), sum(_col2) + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + TableScan + Union + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col3), sum(_col2) + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ((2L * _col2) - (3L * _col3)) (type: bigint), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + function name: UDTFReplicateRows + Select Operator + expressions: col1 (type: string), col2 (type: string) + outputColumnNames: col1, col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: col1 (type: string), col2 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 2L (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 251 Data size: 48516 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 251 Data size: 48516 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 24250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 125 Data size: 24250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + TableScan + Union + Statistics: Num rows: 251 Data size: 48516 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 251 Data size: 48516 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 24250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 125 Data size: 24250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 24250 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col2 > 0L) and ((_col2 * 2L) = _col3)) (type: boolean) + Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 2L (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 270 Data size: 52380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 270 Data size: 52380 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 135 Data size: 26190 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 135 Data size: 26190 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + TableScan + Union + Statistics: Num rows: 270 Data size: 52380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 270 Data size: 52380 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 135 Data size: 26190 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 135 Data size: 26190 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 135 Data size: 26190 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col2 > 0L) and ((_col2 * 2L) = _col3)) (type: boolean) + Statistics: Num rows: 22 Data size: 4268 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 1L (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 1L (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 1L (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src except all select * from src except distinct select * from src except distinct select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src except all select * from src except distinct select * from src except distinct select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain select value from a_n15 group by value except distinct select key from b_n11 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n15 +PREHOOK: Input: default@b_n11 +#### A masked pattern was here #### +POSTHOOK: query: explain select value from a_n15 group by value except distinct select key from b_n11 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n15 +POSTHOOK: Input: default@b_n11 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a_n15 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: int) + outputColumnNames: value + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: value (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), 2L (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col2 (type: bigint), (_col1 * _col2) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1), sum(_col2) + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + TableScan + Union + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col2 (type: bigint), (_col1 * _col2) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1), sum(_col2) + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col1 > 0L) and ((_col1 * 2L) = _col2)) (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: b_n11 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), 1L (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select value from a_n15 group by value except distinct select key from b_n11 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@a_n15 +PREHOOK: Input: default@b_n11 +#### A masked pattern was here #### +POSTHOOK: query: select value from a_n15 group by value except distinct select key from b_n11 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_n15 +POSTHOOK: Input: default@b_n11 +#### A masked pattern was here #### +3 diff --git a/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out b/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out new file mode 100644 index 0000000000..736d59a9b6 --- /dev/null +++ b/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out @@ -0,0 +1,80 @@ +PREHOOK: query: create table t_n25 as select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@t_n25 +POSTHOOK: query: create table t_n25 as select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_n25 +POSTHOOK: Lineage: t_n25.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t_n25.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain analyze table t_n25 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@t_n25 +PREHOOK: Output: default@t_n25 +#### A masked pattern was here #### +POSTHOOK: query: explain analyze table t_n25 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@t_n25 +POSTHOOK: Output: default@t_n25 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: t_n25 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t_n25 + +PREHOOK: query: analyze table t_n25 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@t_n25 +PREHOOK: Output: default@t_n25 +#### A masked pattern was here #### +POSTHOOK: query: analyze table t_n25 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@t_n25 +POSTHOOK: Output: default@t_n25 +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/explain_ddl.q.out b/ql/src/test/results/clientpositive/explain_ddl.q.out new file mode 100644 index 0000000000..aea46d304f --- /dev/null +++ b/ql/src/test/results/clientpositive/explain_ddl.q.out @@ -0,0 +1,813 @@ +PREHOOK: query: CREATE VIEW V1_n0 AS SELECT key, value from src +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@V1_n0 +POSTHOOK: query: CREATE VIEW V1_n0 AS SELECT key, value from src +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@V1_n0 +POSTHOOK: Lineage: V1_n0.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: V1_n0.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from V1_n0 where key > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@v1_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from V1_n0 where key > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@v1_n0 +#### A masked pattern was here #### +497 +PREHOOK: query: CREATE TABLE M1 AS SELECT key, value from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@M1 +POSTHOOK: query: CREATE TABLE M1 AS SELECT key, value from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@M1 +POSTHOOK: Lineage: m1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: m1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from M1 where key > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@m1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from M1 where key > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@m1 +#### A masked pattern was here #### +497 +PREHOOK: query: EXPLAIN CREATE TABLE M1 AS select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@M1 +POSTHOOK: query: EXPLAIN CREATE TABLE M1 AS select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@M1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-8 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-8 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.M1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: col1, col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-8 + Create Table + columns: key string, value string + name: default.M1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.M1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.M1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.M1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: EXPLAIN CREATE TABLE M1 AS select * from M1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@m1 +PREHOOK: Output: database:default +PREHOOK: Output: default@M1 +POSTHOOK: query: EXPLAIN CREATE TABLE M1 AS select * from M1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@m1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@M1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-8 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-8 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: m1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.M1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: col1, col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-8 + Create Table + columns: key string, value string + name: default.M1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.M1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.M1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.M1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: EXPLAIN CREATE TABLE M1 AS select * from V1_n0 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Input: default@v1_n0 +PREHOOK: Output: database:default +PREHOOK: Output: default@M1 +POSTHOOK: query: EXPLAIN CREATE TABLE M1 AS select * from V1_n0 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Input: default@v1_n0 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@M1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-8 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-8 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.M1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: col1, col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-8 + Create Table + columns: key string, value string + name: default.M1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.M1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.M1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.M1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: EXPLAIN CREATE TABLE V1_n0 AS select * from M1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@m1 +PREHOOK: Output: database:default +PREHOOK: Output: default@V1_n0 +POSTHOOK: query: EXPLAIN CREATE TABLE V1_n0 AS select * from M1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@m1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@V1_n0 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-8 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-8 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: m1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.V1_n0 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: col1, col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-8 + Create Table + columns: key string, value string + name: default.V1_n0 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.V1_n0 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.V1_n0 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.V1_n0 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: EXPLAIN CREATE VIEW V1_n0 AS select * from M1 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@m1 +PREHOOK: Output: database:default +PREHOOK: Output: default@V1_n0 +POSTHOOK: query: EXPLAIN CREATE VIEW V1_n0 AS select * from M1 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@m1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@V1_n0 +STAGE DEPENDENCIES: + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Create View + columns: key string, value string + expanded text: select `m1`.`key`, `m1`.`value` from `default`.`M1` + name: default.V1_n0 + original text: select * from M1 + +PREHOOK: query: EXPLAIN CREATE TABLE M1 LIKE src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@M1 +POSTHOOK: query: EXPLAIN CREATE TABLE M1 LIKE src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@M1 +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Create Table + default input format: org.apache.hadoop.mapred.TextInputFormat + default output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + default serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + like: src + name: default.M1 + table properties: + bucketing_version 2 + +PREHOOK: query: EXPLAIN CREATE TABLE M1 LIKE M1 +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@M1 +POSTHOOK: query: EXPLAIN CREATE TABLE M1 LIKE M1 +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@M1 +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Create Table + default input format: org.apache.hadoop.mapred.TextInputFormat + default output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + default serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + like: M1 + name: default.M1 + table properties: + bucketing_version 2 + +PREHOOK: query: EXPLAIN DROP TABLE M1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@m1 +PREHOOK: Output: default@m1 +POSTHOOK: query: EXPLAIN DROP TABLE M1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@m1 +POSTHOOK: Output: default@m1 +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Drop Table + table: M1 + +PREHOOK: query: select count(*) from M1 where key > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@m1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from M1 where key > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@m1 +#### A masked pattern was here #### +497 +PREHOOK: query: EXPLAIN INSERT INTO M1 SELECT * FROM M1 +PREHOOK: type: QUERY +PREHOOK: Input: default@m1 +PREHOOK: Output: default@m1 +POSTHOOK: query: EXPLAIN INSERT INTO M1 SELECT * FROM M1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@m1 +POSTHOOK: Output: default@m1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: m1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.m1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.m1 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.m1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.m1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.m1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: select count(*) from M1 where key > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@m1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from M1 where key > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@m1 +#### A masked pattern was here #### +497 +PREHOOK: query: EXPLAIN TRUNCATE TABLE M1 +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@m1 +POSTHOOK: query: EXPLAIN TRUNCATE TABLE M1 +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@m1 +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Truncate Table or Partition + table name: default.m1 + +PREHOOK: query: select count(*) from M1 where key > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@m1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from M1 where key > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@m1 +#### A masked pattern was here #### +497 diff --git a/ql/src/test/results/clientpositive/llap/explain_logical.q.out b/ql/src/test/results/clientpositive/explain_logical.q.out similarity index 70% rename from ql/src/test/results/clientpositive/llap/explain_logical.q.out rename to ql/src/test/results/clientpositive/explain_logical.q.out index 508a72354c..56c47d6025 100644 --- a/ql/src/test/results/clientpositive/llap/explain_logical.q.out +++ b/ql/src/test/results/clientpositive/explain_logical.q.out @@ -81,28 +81,34 @@ srcpart TableScan (TS_0) alias: srcpart filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_2) expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator (GBY_3) aggregations: count() keys: key (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator (RS_4) key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Group By Operator (GBY_5) aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator (FS_7) compressed: false + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -122,28 +128,34 @@ LOGICAL PLAN: src TableScan (TS_0) alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_1) expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator (GBY_2) aggregations: count() keys: key (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator (RS_3) key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Group By Operator (GBY_4) aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator (FS_6) compressed: false + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -169,12 +181,16 @@ LOGICAL PLAN: $hdt$_0-subquery1:src TableScan (TS_0) alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_1) expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Union (UNION_5) + Statistics: Num rows: 2500 Data size: 445000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator (FS_7) compressed: false + Statistics: Num rows: 2500 Data size: 445000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -183,10 +199,13 @@ $hdt$_0-subquery2:srcpart TableScan (TS_2) alias: srcpart filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_4) expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Union (UNION_5) + Statistics: Num rows: 2500 Data size: 445000 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: EXPLAIN LOGICAL SELECT S1.key, S2.value FROM src S1 JOIN srcpart S2 ON S1.key = S2.key WHERE ds IS NOT NULL @@ -204,17 +223,21 @@ LOGICAL PLAN: $hdt$_0:s1 TableScan (TS_0) alias: s1 - filterExpr: (key is not null and (key) IN (RS[7])) (type: boolean) - Filter Operator (FIL_13) - predicate: (key is not null and (key) IN (RS[7])) (type: boolean) + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator (FIL_11) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_2) expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator (RS_6) key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Join Operator (JOIN_8) condition map: Inner Join 0 to 1 @@ -222,11 +245,14 @@ $hdt$_0:s1 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 + Statistics: Num rows: 3164 Data size: 563192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_9) expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3164 Data size: 563192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator (FS_10) compressed: false + Statistics: Num rows: 3164 Data size: 563192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -234,17 +260,21 @@ $hdt$_0:s1 $hdt$_1:s2 TableScan (TS_3) alias: s2 - filterExpr: (key is not null and (key) IN (RS[6])) (type: boolean) - Filter Operator (FIL_14) - predicate: (key is not null and (key) IN (RS[6])) (type: boolean) + filterExpr: (key is not null and ds is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator (FIL_12) + predicate: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_5) expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator (RS_7) key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Join Operator (JOIN_8) condition map: @@ -253,6 +283,7 @@ $hdt$_1:s2 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 + Statistics: Num rows: 3164 Data size: 563192 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: EXPLAIN LOGICAL SELECT * FROM V1_n8 PREHOOK: type: QUERY @@ -270,9 +301,11 @@ src alias: src properties: insideView TRUE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_1) expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE ListSink (LIST_SINK_3) PREHOOK: query: EXPLAIN LOGICAL SELECT * FROM V2_n3 @@ -300,9 +333,11 @@ srcpart filterExpr: ds is not null (type: boolean) properties: insideView TRUE + Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_2) expressions: ds (type: string), key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 724000 Basic stats: COMPLETE Column stats: COMPLETE ListSink (LIST_SINK_5) PREHOOK: query: EXPLAIN LOGICAL SELECT * FROM V3_n1 @@ -323,19 +358,23 @@ LOGICAL PLAN: $hdt$_0:srcpart TableScan (TS_0) alias: srcpart - filterExpr: (key is not null and (key) IN (RS[7])) (type: boolean) + filterExpr: (ds is not null and key is not null) (type: boolean) properties: insideView TRUE - Filter Operator (FIL_13) - predicate: (key is not null and (key) IN (RS[7])) (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator (FIL_11) + predicate: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_2) expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator (RS_6) key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Join Operator (JOIN_8) condition map: Inner Join 0 to 1 @@ -343,11 +382,14 @@ $hdt$_0:srcpart 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 + Statistics: Num rows: 3164 Data size: 563192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_9) expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3164 Data size: 563192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator (FS_10) compressed: false + Statistics: Num rows: 3164 Data size: 563192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -355,19 +397,23 @@ $hdt$_0:srcpart $hdt$_1:src2 TableScan (TS_3) alias: src2 - filterExpr: (key is not null and (key) IN (RS[6])) (type: boolean) + filterExpr: key is not null (type: boolean) properties: insideView TRUE - Filter Operator (FIL_14) - predicate: (key is not null and (key) IN (RS[6])) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator (FIL_12) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_5) expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator (RS_7) key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Join Operator (JOIN_8) condition map: @@ -376,6 +422,7 @@ $hdt$_1:src2 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 + Statistics: Num rows: 3164 Data size: 563192 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: EXPLAIN LOGICAL SELECT * FROM V4_n1 PREHOOK: type: QUERY @@ -397,19 +444,23 @@ LOGICAL PLAN: $hdt$_0:srcpart TableScan (TS_0) alias: srcpart - filterExpr: (key is not null and (key) IN (RS[10]) and (key) IN (RS[13])) (type: boolean) + filterExpr: (ds is not null and key is not null) (type: boolean) properties: insideView TRUE - Filter Operator (FIL_22) - predicate: (key is not null and (key) IN (RS[10]) and (key) IN (RS[13])) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator (FIL_17) + predicate: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_2) expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator (RS_9) key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Join Operator (JOIN_11) condition map: @@ -418,11 +469,13 @@ $hdt$_0:srcpart 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3164 Data size: 838460 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator (RS_12) key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3164 Data size: 838460 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string) Join Operator (JOIN_14) condition map: @@ -431,11 +484,14 @@ $hdt$_0:srcpart 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 5006 Data size: 1346614 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_15) expressions: _col2 (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5006 Data size: 1346614 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator (FS_16) compressed: false + Statistics: Num rows: 5006 Data size: 1346614 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -443,19 +499,23 @@ $hdt$_0:srcpart $hdt$_1:src TableScan (TS_3) alias: src - filterExpr: (key is not null and (key) IN (RS[9]) and (key) IN (RS[13])) (type: boolean) + filterExpr: key is not null (type: boolean) properties: insideView TRUE - Filter Operator (FIL_23) - predicate: (key is not null and (key) IN (RS[9]) and (key) IN (RS[13])) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator (FIL_18) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_5) expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator (RS_10) key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Join Operator (JOIN_11) condition map: Inner Join 0 to 1 @@ -463,22 +523,27 @@ $hdt$_1:src 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3164 Data size: 838460 Basic stats: COMPLETE Column stats: COMPLETE $hdt$_2:src3 TableScan (TS_6) alias: src3 - filterExpr: (key is not null and (key) IN (RS[12]) and (key) IN (RS[9]) and (key) IN (RS[10])) (type: boolean) + filterExpr: key is not null (type: boolean) properties: insideView TRUE - Filter Operator (FIL_24) - predicate: (key is not null and (key) IN (RS[12]) and (key) IN (RS[9]) and (key) IN (RS[10])) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator (FIL_19) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_8) expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator (RS_13) key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Join Operator (JOIN_14) condition map: @@ -487,6 +552,7 @@ $hdt$_2:src3 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 5006 Data size: 1346614 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: CREATE VIEW V5_n0 as SELECT * FROM srcpart where ds = '10' PREHOOK: type: CREATEVIEW @@ -519,11 +585,14 @@ srcpart filterExpr: (ds = '10') (type: boolean) properties: insideView TRUE + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator (FIL_4) predicate: (ds = '10') (type: boolean) + Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_2) expressions: key (type: string), value (type: string), '10' (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE ListSink (LIST_SINK_5) PREHOOK: query: EXPLAIN LOGICAL SELECT s1.key, s1.cnt, s2.value FROM (SELECT key, count(value) as cnt FROM src GROUP BY key) s1 JOIN src s2 ON (s1.key = s2.key) ORDER BY s1.key @@ -538,31 +607,37 @@ LOGICAL PLAN: $hdt$_0:src TableScan (TS_0) alias: src - filterExpr: (key is not null and (key) IN (RS[11])) (type: boolean) - Filter Operator (FIL_19) - predicate: (key is not null and (key) IN (RS[11])) (type: boolean) + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator (FIL_17) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator (GBY_3) aggregations: count(value) keys: key (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator (RS_4) key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Group By Operator (GBY_5) aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator (RS_10) key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Join Operator (JOIN_12) condition map: @@ -571,19 +646,24 @@ $hdt$_0:src 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 395 Data size: 73470 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_13) expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 395 Data size: 73470 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator (RS_14) key expressions: _col0 (type: string) null sort order: z sort order: + + Statistics: Num rows: 395 Data size: 73470 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: string) Select Operator (SEL_15) expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 395 Data size: 73470 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator (FS_16) compressed: false + Statistics: Num rows: 395 Data size: 73470 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -591,17 +671,21 @@ $hdt$_0:src $hdt$_1:s2 TableScan (TS_7) alias: s2 - filterExpr: (key is not null and (key) IN (RS[10])) (type: boolean) - Filter Operator (FIL_20) - predicate: (key is not null and (key) IN (RS[10])) (type: boolean) + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator (FIL_18) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_9) expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator (RS_11) key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Join Operator (JOIN_12) condition map: @@ -610,4 +694,5 @@ $hdt$_1:s2 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 395 Data size: 73470 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/explaindenpendencydiffengs.q.out b/ql/src/test/results/clientpositive/explaindenpendencydiffengs.q.out similarity index 96% rename from ql/src/test/results/clientpositive/llap/explaindenpendencydiffengs.q.out rename to ql/src/test/results/clientpositive/explaindenpendencydiffengs.q.out index e6cee81c81..a31317bf99 100644 --- a/ql/src/test/results/clientpositive/llap/explaindenpendencydiffengs.q.out +++ b/ql/src/test/results/clientpositive/explaindenpendencydiffengs.q.out @@ -43,7 +43,7 @@ POSTHOOK: Input: default@dependtest POSTHOOK: Input: default@dependtest@b=1 POSTHOOK: Input: default@viewtest #### A masked pattern was here #### -{"input_tables":[{"tablename":"default@viewtest","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@dependtest","tabletype":"MANAGED_TABLE","tableParents":"[default@viewtest]"}],"input_partitions":[{"partitionName":"default@dependtest@b=1"}]} +{"input_tables":[{"tablename":"default@viewtest","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@dependtest","tabletype":"MANAGED_TABLE","tableParents":"[default@viewtest]"}],"input_partitions":[{"partitionName":"default@dependtest@b=1","partitionParents":"[default@viewtest]"}]} PREHOOK: query: drop view viewtest PREHOOK: type: DROPVIEW PREHOOK: Input: default@viewtest diff --git a/ql/src/test/results/clientpositive/llap/extract.q.out b/ql/src/test/results/clientpositive/extract.q.out similarity index 71% rename from ql/src/test/results/clientpositive/llap/extract.q.out rename to ql/src/test/results/clientpositive/extract.q.out index 9625b5b989..536dc4e2c9 100644 --- a/ql/src/test/results/clientpositive/llap/extract.q.out +++ b/ql/src/test/results/clientpositive/extract.q.out @@ -36,19 +36,34 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@extract_udf #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: extract_udf + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: day(t) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: extract_udf - Select Operator - expressions: day(t) (type: int) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: select day(t) from extract_udf @@ -74,19 +89,34 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@extract_udf #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: extract_udf + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: day(t) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: extract_udf - Select Operator - expressions: day(t) (type: int) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: select extract(day from t) from extract_udf diff --git a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_date.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out similarity index 85% rename from ql/src/test/results/clientpositive/llap/extrapolate_part_stats_date.q.out rename to ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out index 2eb4e4ba65..e6710d57b1 100644 --- a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_date.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out @@ -71,51 +71,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: date_dim_n1 - filterExpr: ((d_date > DATE'1900-01-02') and (d_date_sk = 2416945L)) (type: boolean) - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (d_date > DATE'1900-01-02') (type: boolean) - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Map Reduce + Map Operator Tree: + TableScan + alias: date_dim_n1 + filterExpr: ((d_date > DATE'1900-01-02') and (d_date_sk = 2416945L)) (type: boolean) + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (d_date > DATE'1900-01-02') (type: boolean) + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -345,9 +336,11 @@ STAGE PLANS: Processor Tree: TableScan alias: date_dim_n1 + Statistics: Num rows: 4 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: d_date (type: date) outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_full.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out similarity index 98% rename from ql/src/test/results/clientpositive/llap/extrapolate_part_stats_full.q.out rename to ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out index 3a7d46ade0..97f31bfdb6 100644 --- a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_full.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out @@ -218,10 +218,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_1d @@ -341,10 +343,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: create table if not exists loc_orc_2d ( @@ -653,10 +657,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_2d @@ -876,9 +882,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out similarity index 98% rename from ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial.q.out rename to ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out index 3e6024c6a9..36251dea7a 100644 --- a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -267,7 +267,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d_n1 { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 429 + totalSize 428 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -313,7 +313,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d_n1 { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 436 + totalSize 434 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -340,10 +340,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d_n1 + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_1d_n1 @@ -486,7 +488,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d_n1 { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 429 + totalSize 428 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -532,7 +534,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d_n1 { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 436 + totalSize 434 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -559,10 +561,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d_n1 + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc_1d_n1 partition(year='2000') compute statistics for columns state @@ -733,7 +737,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d_n1 { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 429 + totalSize 428 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -779,7 +783,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d_n1 { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 436 + totalSize 434 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -806,10 +810,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d_n1 + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_1d_n1 @@ -952,7 +958,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d_n1 { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 429 + totalSize 428 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -998,7 +1004,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d_n1 { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 436 + totalSize 434 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1025,10 +1031,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d_n1 + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: create table if not exists loc_orc_2d_n1 ( @@ -1673,10 +1681,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d_n1 + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_2d_n1 @@ -2239,9 +2249,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d_n1 + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/llap/fetch_aggregation.q.out b/ql/src/test/results/clientpositive/fetch_aggregation.q.out similarity index 50% rename from ql/src/test/results/clientpositive/llap/fetch_aggregation.q.out rename to ql/src/test/results/clientpositive/fetch_aggregation.q.out index 655028b6e5..8949995258 100644 --- a/ql/src/test/results/clientpositive/llap/fetch_aggregation.q.out +++ b/ql/src/test/results/clientpositive/fetch_aggregation.q.out @@ -14,33 +14,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col0), sum(_col0), min(_col0), max(_col0), sum(_col2), sum(_col1) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: llap - LLAP IO: no inputs + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0), sum(_col0), min(_col0), max(_col0), sum(_col2), sum(_col1) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -50,9 +44,11 @@ STAGE PLANS: aggregations: count(_col0), sum(_col1), min(_col2), max(_col3), sum(_col4), sum(_col5) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: double), (_col1 / _col0) (type: double), _col2 (type: string), _col3 (type: string), power(((_col4 - ((_col5 * _col5) / _col0)) / _col0), 0.5) (type: double), ((_col4 - ((_col5 * _col5) / _col0)) / _col0) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select count(key),sum(key),avg(key),min(key),max(key),std(key),variance(key) from src diff --git a/ql/src/test/results/clientpositive/filter_aggr.q.out b/ql/src/test/results/clientpositive/filter_aggr.q.out new file mode 100644 index 0000000000..e3fe160545 --- /dev/null +++ b/ql/src/test/results/clientpositive/filter_aggr.q.out @@ -0,0 +1,481 @@ +PREHOOK: query: explain extended +select key, c, m from +( +select key, c, 1 as m from (select key, count(key) as c from src group by key)s1 +union all +select key, c, 2 as m from (select key, count(key) as c from src group by key)s2 +)sub +where m = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain extended +select key, c, m from +( +select key, c, 1 as m from (select key, count(key) as c from src group by key)s1 +union all +select key, c, 2 as m from (select key, count(key) as c from src group by key)s2 +)sub +where m = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT `key`, COUNT(`key`) AS `c`, 1 AS `m` +FROM `default`.`src` +GROUP BY `key` +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(key) + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), 1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:bigint:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, c, m from +( +select key, c, 1 as m from (select key, count(key) as c from src group by key)s1 +union all +select key, c, 2 as m from (select key, count(key) as c from src group by key)s2 +)sub +where m = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, c, m from +( +select key, c, 1 as m from (select key, count(key) as c from src group by key)s1 +union all +select key, c, 2 as m from (select key, count(key) as c from src group by key)s2 +)sub +where m = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 3 1 +10 1 1 +100 2 1 +103 2 1 +104 2 1 +105 1 1 +11 1 1 +111 1 1 +113 2 1 +114 1 1 +116 1 1 +118 2 1 +119 3 1 +12 2 1 +120 2 1 +125 2 1 +126 1 1 +128 3 1 +129 2 1 +131 1 1 +133 1 1 +134 2 1 +136 1 1 +137 2 1 +138 4 1 +143 1 1 +145 1 1 +146 2 1 +149 2 1 +15 2 1 +150 1 1 +152 2 1 +153 1 1 +155 1 1 +156 1 1 +157 1 1 +158 1 1 +160 1 1 +162 1 1 +163 1 1 +164 2 1 +165 2 1 +166 1 1 +167 3 1 +168 1 1 +169 4 1 +17 1 1 +170 1 1 +172 2 1 +174 2 1 +175 2 1 +176 2 1 +177 1 1 +178 1 1 +179 2 1 +18 2 1 +180 1 1 +181 1 1 +183 1 1 +186 1 1 +187 3 1 +189 1 1 +19 1 1 +190 1 1 +191 2 1 +192 1 1 +193 3 1 +194 1 1 +195 2 1 +196 1 1 +197 2 1 +199 3 1 +2 1 1 +20 1 1 +200 2 1 +201 1 1 +202 1 1 +203 2 1 +205 2 1 +207 2 1 +208 3 1 +209 2 1 +213 2 1 +214 1 1 +216 2 1 +217 2 1 +218 1 1 +219 2 1 +221 2 1 +222 1 1 +223 2 1 +224 2 1 +226 1 1 +228 1 1 +229 2 1 +230 5 1 +233 2 1 +235 1 1 +237 2 1 +238 2 1 +239 2 1 +24 2 1 +241 1 1 +242 2 1 +244 1 1 +247 1 1 +248 1 1 +249 1 1 +252 1 1 +255 2 1 +256 2 1 +257 1 1 +258 1 1 +26 2 1 +260 1 1 +262 1 1 +263 1 1 +265 2 1 +266 1 1 +27 1 1 +272 2 1 +273 3 1 +274 1 1 +275 1 1 +277 4 1 +278 2 1 +28 1 1 +280 2 1 +281 2 1 +282 2 1 +283 1 1 +284 1 1 +285 1 1 +286 1 1 +287 1 1 +288 2 1 +289 1 1 +291 1 1 +292 1 1 +296 1 1 +298 3 1 +30 1 1 +302 1 1 +305 1 1 +306 1 1 +307 2 1 +308 1 1 +309 2 1 +310 1 1 +311 3 1 +315 1 1 +316 3 1 +317 2 1 +318 3 1 +321 2 1 +322 2 1 +323 1 1 +325 2 1 +327 3 1 +33 1 1 +331 2 1 +332 1 1 +333 2 1 +335 1 1 +336 1 1 +338 1 1 +339 1 1 +34 1 1 +341 1 1 +342 2 1 +344 2 1 +345 1 1 +348 5 1 +35 3 1 +351 1 1 +353 2 1 +356 1 1 +360 1 1 +362 1 1 +364 1 1 +365 1 1 +366 1 1 +367 2 1 +368 1 1 +369 3 1 +37 2 1 +373 1 1 +374 1 1 +375 1 1 +377 1 1 +378 1 1 +379 1 1 +382 2 1 +384 3 1 +386 1 1 +389 1 1 +392 1 1 +393 1 1 +394 1 1 +395 2 1 +396 3 1 +397 2 1 +399 2 1 +4 1 1 +400 1 1 +401 5 1 +402 1 1 +403 3 1 +404 2 1 +406 4 1 +407 1 1 +409 3 1 +41 1 1 +411 1 1 +413 2 1 +414 2 1 +417 3 1 +418 1 1 +419 1 1 +42 2 1 +421 1 1 +424 2 1 +427 1 1 +429 2 1 +43 1 1 +430 3 1 +431 3 1 +432 1 1 +435 1 1 +436 1 1 +437 1 1 +438 3 1 +439 2 1 +44 1 1 +443 1 1 +444 1 1 +446 1 1 +448 1 1 +449 1 1 +452 1 1 +453 1 1 +454 3 1 +455 1 1 +457 1 1 +458 2 1 +459 2 1 +460 1 1 +462 2 1 +463 2 1 +466 3 1 +467 1 1 +468 4 1 +469 5 1 +47 1 1 +470 1 1 +472 1 1 +475 1 1 +477 1 1 +478 2 1 +479 1 1 +480 3 1 +481 1 1 +482 1 1 +483 1 1 +484 1 1 +485 1 1 +487 1 1 +489 4 1 +490 1 1 +491 1 1 +492 2 1 +493 1 1 +494 1 1 +495 1 1 +496 1 1 +497 1 1 +498 3 1 +5 3 1 +51 2 1 +53 1 1 +54 1 1 +57 1 1 +58 2 1 +64 1 1 +65 1 1 +66 1 1 +67 2 1 +69 1 1 +70 3 1 +72 2 1 +74 1 1 +76 2 1 +77 1 1 +78 1 1 +8 1 1 +80 1 1 +82 1 1 +83 2 1 +84 2 1 +85 1 1 +86 1 1 +87 1 1 +9 1 1 +90 3 1 +92 1 1 +95 2 1 +96 1 1 +97 2 1 +98 2 1 diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out new file mode 100644 index 0000000000..74a7aa89e7 --- /dev/null +++ b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -0,0 +1,504 @@ +PREHOOK: query: EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value='2008-04-08' AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value='2008-04-08' AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: m + filterExpr: ((value <> '') and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((value <> '') and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (value = '2008-04-08') (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) + TableScan + alias: f + filterExpr: ((value) IN ('2008-04-08', '2008-04-09') and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((value) IN ('2008-04-08', '2008-04-09') and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), (value = '2008-04-08') (type: boolean), (value = '2008-04-09') (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean), _col2 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col4 and _col2) or _col5) (type: boolean) + Statistics: Num rows: 3 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 3 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: string) + TableScan + alias: g + filterExpr: (value <> '') (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value <> '') (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col3, _col6 + Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value IN ('2008-04-08','2008-04-10') AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT f.key, g.value +FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') +WHERE (f.key = m.key AND f.value IN ('2008-04-08','2008-04-10') AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: m + filterExpr: ((value <> '') and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((value <> '') and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (value = '2008-04-08') (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) + TableScan + alias: f + filterExpr: ((value) IN ('2008-04-08', '2008-04-10', '2008-04-09') and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((value) IN ('2008-04-08', '2008-04-10', '2008-04-09') and key is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), (value) IN ('2008-04-08', '2008-04-10') (type: boolean), (value = '2008-04-09') (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean), _col2 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 7 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col4 and _col2) or _col5) (type: boolean) + Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: string) + TableScan + alias: g + filterExpr: (value <> '') (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value <> '') (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col3, _col6 + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT t1.key +FROM cbo_t1 t1 +JOIN ( + SELECT t2.key + FROM cbo_t2 t2 + JOIN (SELECT * FROM cbo_t3 t3 WHERE c_int=1) t3 ON t2.key=t3.c_int + WHERE ((t2.key=t3.key) AND (t2.c_float + t3.c_float > 2)) OR + ((t2.key=t3.key) AND (t2.c_int + t3.c_int > 2))) t4 ON t1.key=t4.key +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t1 +PREHOOK: Input: default@cbo_t1@dt=2014 +PREHOOK: Input: default@cbo_t2 +PREHOOK: Input: default@cbo_t2@dt=2014 +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT t1.key +FROM cbo_t1 t1 +JOIN ( + SELECT t2.key + FROM cbo_t2 t2 + JOIN (SELECT * FROM cbo_t3 t3 WHERE c_int=1) t3 ON t2.key=t3.c_int + WHERE ((t2.key=t3.key) AND (t2.c_float + t3.c_float > 2)) OR + ((t2.key=t3.key) AND (t2.c_int + t3.c_int > 2))) t4 ON t1.key=t4.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t1 +POSTHOOK: Input: default@cbo_t1@dt=2014 +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Input: default@cbo_t2@dt=2014 +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (UDFToDouble(key) = 1.0D) (type: boolean) + Statistics: Num rows: 20 Data size: 1767 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) = 1.0D) (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), c_float (type: float), ((c_int + 1) > 2) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: float), _col2 (type: boolean) + TableScan + alias: t3 + filterExpr: ((c_int = 1) and (UDFToDouble(key) = 1.0D)) (type: boolean) + Statistics: Num rows: 20 Data size: 1767 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((c_int = 1) and (UDFToDouble(key) = 1.0D)) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), c_float (type: float) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: float) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 6 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((_col1 + _col4) > 2.0) or _col2) (type: boolean) + Statistics: Num rows: 5 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + filterExpr: (UDFToDouble(key) = 1.0D) (type: boolean) + Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) = 1.0D) (type: boolean) + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 5370 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 5370 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT f.key, f.value, m.value +FROM src f JOIN src m ON(f.key = m.key AND m.value is not null AND m.value !='') +WHERE (f.value IN ('2008-04-08','2008-04-10') AND f.value IN ('2008-04-08','2008-04-09') AND m.value='2008-04-10') OR (m.value='2008-04-08') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT f.key, f.value, m.value +FROM src f JOIN src m ON(f.key = m.key AND m.value is not null AND m.value !='') +WHERE (f.value IN ('2008-04-08','2008-04-10') AND f.value IN ('2008-04-08','2008-04-09') AND m.value='2008-04-10') OR (m.value='2008-04-08') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (value) IN ('2008-04-08', '2008-04-10') (type: boolean), (value) IN ('2008-04-08', '2008-04-09') (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean), _col3 (type: boolean) + TableScan + alias: m + filterExpr: ((value) IN ('2008-04-10', '2008-04-08') and (value <> '') and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((value) IN ('2008-04-10', '2008-04-08') and (value <> '') and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (value = '2008-04-10') (type: boolean), (value = '2008-04-08') (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean), _col3 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7 + Statistics: Num rows: 4 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col2 and _col3 and _col6) or _col7) (type: boolean) + Statistics: Num rows: 2 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 538 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 538 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown2.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown2.q.out new file mode 100644 index 0000000000..07dec3c5fa --- /dev/null +++ b/ql/src/test/results/clientpositive/filter_cond_pushdown2.q.out @@ -0,0 +1,361 @@ +PREHOOK: query: drop table if exists users_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists users_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE users_table( + `field_1` int, + `field_2` string, + `field_3` boolean, + `field_4` boolean, + `field_5` boolean, + `field_6` boolean, + `field_7` boolean) +ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@users_table +POSTHOOK: query: CREATE TABLE users_table( + `field_1` int, + `field_2` string, + `field_3` boolean, + `field_4` boolean, + `field_5` boolean, + `field_6` boolean, + `field_7` boolean) +ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@users_table +PREHOOK: query: load data local inpath '../../data/files/small_csv.csv' into table users_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@users_table +POSTHOOK: query: load data local inpath '../../data/files/small_csv.csv' into table users_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@users_table +PREHOOK: query: explain +with all_hits as ( +select * from users_table +), +all_exposed_users as ( +select distinct +field_1, +field_2 +from all_hits +where field_3 +), +interacted as ( +select distinct +field_1, +field_2 +from all_hits +where field_4 +) +select +all_exposed_users.field_1, +count(*) as nr_exposed, +sum(if(interacted.field_2 is not null, 1, 0)) as nr_interacted +from all_exposed_users +left outer join interacted +on all_exposed_users.field_1 = interacted.field_1 +and all_exposed_users.field_2 = interacted.field_2 +group by all_exposed_users.field_1 +order by all_exposed_users.field_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@users_table +#### A masked pattern was here #### +POSTHOOK: query: explain +with all_hits as ( +select * from users_table +), +all_exposed_users as ( +select distinct +field_1, +field_2 +from all_hits +where field_3 +), +interacted as ( +select distinct +field_1, +field_2 +from all_hits +where field_4 +) +select +all_exposed_users.field_1, +count(*) as nr_exposed, +sum(if(interacted.field_2 is not null, 1, 0)) as nr_interacted +from all_exposed_users +left outer join interacted +on all_exposed_users.field_1 = interacted.field_1 +and all_exposed_users.field_2 = interacted.field_2 +group by all_exposed_users.field_1 +order by all_exposed_users.field_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@users_table +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 is a root stage + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: users_table + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: field_3 (type: boolean) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: field_1 (type: int), field_2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(if(_col3 is not null, 1, 0)) + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: users_table + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: field_4 (type: boolean) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: field_1 (type: int), field_2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: with all_hits as ( +select * from users_table +), +all_exposed_users as ( +select distinct +field_1, +field_2 +from all_hits +where field_3 +), +interacted as ( +select distinct +field_1, +field_2 +from all_hits +where field_4 +) +select +all_exposed_users.field_1, +count(*) as nr_exposed, +sum(if(interacted.field_2 is not null, 1, 0)) as nr_interacted +from all_exposed_users +left outer join interacted +on all_exposed_users.field_1 = interacted.field_1 +and all_exposed_users.field_2 = interacted.field_2 +group by all_exposed_users.field_1 +order by all_exposed_users.field_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@users_table +#### A masked pattern was here #### +POSTHOOK: query: with all_hits as ( +select * from users_table +), +all_exposed_users as ( +select distinct +field_1, +field_2 +from all_hits +where field_3 +), +interacted as ( +select distinct +field_1, +field_2 +from all_hits +where field_4 +) +select +all_exposed_users.field_1, +count(*) as nr_exposed, +sum(if(interacted.field_2 is not null, 1, 0)) as nr_interacted +from all_exposed_users +left outer join interacted +on all_exposed_users.field_1 = interacted.field_1 +and all_exposed_users.field_2 = interacted.field_2 +group by all_exposed_users.field_1 +order by all_exposed_users.field_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@users_table +#### A masked pattern was here #### +31651 1 1 +216051 1 0 +239413 1 1 +252028 1 0 +269414 1 0 +300632 1 1 +301813 1 0 +409611 1 1 +438598 1 0 +453386 1 0 +474061 1 1 +537646 1 0 +575004 1 1 +676296 1 1 +715496 1 1 +750354 1 0 +804209 1 1 +807477 1 0 diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown_HIVE_15647.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown_HIVE_15647.q.out new file mode 100644 index 0000000000..54f28df314 --- /dev/null +++ b/ql/src/test/results/clientpositive/filter_cond_pushdown_HIVE_15647.q.out @@ -0,0 +1,424 @@ +PREHOOK: query: CREATE TABLE sales_HIVE_15647 (store_id INTEGER, store_number INTEGER, customer_id INTEGER) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sales_HIVE_15647 +POSTHOOK: query: CREATE TABLE sales_HIVE_15647 (store_id INTEGER, store_number INTEGER, customer_id INTEGER) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sales_HIVE_15647 +PREHOOK: query: CREATE TABLE store_HIVE_15647 (store_id INTEGER, salad_bar BOOLEAN) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_HIVE_15647 +POSTHOOK: query: CREATE TABLE store_HIVE_15647 (store_id INTEGER, salad_bar BOOLEAN) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_HIVE_15647 +PREHOOK: query: explain select count(*) from +sales_HIVE_15647 as sales +join store_HIVE_15647 as store on sales.store_id = store.store_id +where ((store.salad_bar)) and ((sales.store_number) <=> (sales.customer_id)) +PREHOOK: type: QUERY +PREHOOK: Input: default@sales_hive_15647 +PREHOOK: Input: default@store_hive_15647 +#### A masked pattern was here #### +POSTHOOK: query: explain select count(*) from +sales_HIVE_15647 as sales +join store_HIVE_15647 as store on sales.store_id = store.store_id +where ((store.salad_bar)) and ((sales.store_number) <=> (sales.customer_id)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sales_hive_15647 +POSTHOOK: Input: default@store_hive_15647 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: sales + filterExpr: ((store_number IS NOT DISTINCT FROM customer_id) and store_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((store_number IS NOT DISTINCT FROM customer_id) and store_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: store_id (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: store_id (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: store + filterExpr: (store_id is not null and salad_bar) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (store_id is not null and salad_bar) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: store_id (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: store_id (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 store_id (type: int) + 1 store_id (type: int) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(*) from +sales_HIVE_15647 as sales +join store_HIVE_15647 as store on sales.store_id = store.store_id +where ((store.salad_bar)) and ((sales.store_number) = (sales.customer_id)) +PREHOOK: type: QUERY +PREHOOK: Input: default@sales_hive_15647 +PREHOOK: Input: default@store_hive_15647 +#### A masked pattern was here #### +POSTHOOK: query: explain select count(*) from +sales_HIVE_15647 as sales +join store_HIVE_15647 as store on sales.store_id = store.store_id +where ((store.salad_bar)) and ((sales.store_number) = (sales.customer_id)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sales_hive_15647 +POSTHOOK: Input: default@store_hive_15647 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: sales + filterExpr: ((store_number = customer_id) and store_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((store_number = customer_id) and store_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: store_id (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: store_id (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: store + filterExpr: (store_id is not null and salad_bar) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (store_id is not null and salad_bar) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: store_id (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: store_id (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 store_id (type: int) + 1 store_id (type: int) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(*) from +sales_HIVE_15647 as sales +join store_HIVE_15647 as store on sales.store_id = store.store_id +where ((store.salad_bar = true)) and ((sales.store_number) <=> (sales.customer_id)) +PREHOOK: type: QUERY +PREHOOK: Input: default@sales_hive_15647 +PREHOOK: Input: default@store_hive_15647 +#### A masked pattern was here #### +POSTHOOK: query: explain select count(*) from +sales_HIVE_15647 as sales +join store_HIVE_15647 as store on sales.store_id = store.store_id +where ((store.salad_bar = true)) and ((sales.store_number) <=> (sales.customer_id)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sales_hive_15647 +POSTHOOK: Input: default@store_hive_15647 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: sales + filterExpr: ((store_number IS NOT DISTINCT FROM customer_id) and store_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((store_number IS NOT DISTINCT FROM customer_id) and store_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: store_id (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: store_id (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: store + filterExpr: (store_id is not null and (salad_bar = true)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (store_id is not null and (salad_bar = true)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: store_id (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: store_id (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 store_id (type: int) + 1 store_id (type: int) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(*) from +sales_HIVE_15647 as sales +join store_HIVE_15647 as store on sales.store_id = store.store_id +where ((store.salad_bar = false)) and ((sales.store_number) > (sales.customer_id)) +PREHOOK: type: QUERY +PREHOOK: Input: default@sales_hive_15647 +PREHOOK: Input: default@store_hive_15647 +#### A masked pattern was here #### +POSTHOOK: query: explain select count(*) from +sales_HIVE_15647 as sales +join store_HIVE_15647 as store on sales.store_id = store.store_id +where ((store.salad_bar = false)) and ((sales.store_number) > (sales.customer_id)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sales_hive_15647 +POSTHOOK: Input: default@store_hive_15647 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: sales + filterExpr: (store_id is not null and (store_number > customer_id)) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (store_id is not null and (store_number > customer_id)) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: store_id (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: store_id (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: store + filterExpr: (store_id is not null and (salad_bar = false)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (store_id is not null and (salad_bar = false)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: store_id (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: store_id (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 store_id (type: int) + 1 store_id (type: int) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/filter_in_or_dup.q.out b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out new file mode 100644 index 0000000000..7bb40461c8 --- /dev/null +++ b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out @@ -0,0 +1,156 @@ +PREHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2') +AND f.key IN ('1', '2') +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t1 +PREHOOK: Input: default@cbo_t1@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2') +AND f.key IN ('1', '2') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t1 +POSTHOOK: Input: default@cbo_t1@dt=2014 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + filterExpr: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key = '2') +AND f.key IN ('1', '2', '3') +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t1 +PREHOOK: Input: default@cbo_t1@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key = '2') +AND f.key IN ('1', '2', '3') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t1 +POSTHOOK: Input: default@cbo_t1@dt=2014 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + filterExpr: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2' OR f.key='3') +AND f.key IN ('1', '2') +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t1 +PREHOOK: Input: default@cbo_t1@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2' OR f.key='3') +AND f.key IN ('1', '2') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t1 +POSTHOOK: Input: default@cbo_t1@dt=2014 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + filterExpr: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/filter_numeric.q.out b/ql/src/test/results/clientpositive/filter_numeric.q.out similarity index 97% rename from ql/src/test/results/clientpositive/llap/filter_numeric.q.out rename to ql/src/test/results/clientpositive/filter_numeric.q.out index 19ce226560..2b3c586874 100644 --- a/ql/src/test/results/clientpositive/llap/filter_numeric.q.out +++ b/ql/src/test/results/clientpositive/filter_numeric.q.out @@ -43,11 +43,14 @@ STAGE PLANS: TableScan alias: partint filterExpr: (hr < 11) (type: boolean) + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (hr < 11) (type: boolean) + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select key, value, hr from partint where hr < 11 @@ -80,9 +83,11 @@ STAGE PLANS: TableScan alias: partint filterExpr: ((hr <= 12) and (hr > 11)) (type: boolean) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select key, value, hr from partint where hr <= 12 and hr > 11 @@ -619,9 +624,11 @@ STAGE PLANS: TableScan alias: partint filterExpr: hr BETWEEN 11 AND 12 (type: boolean) + Statistics: Num rows: 1000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select key, value, hr from partint where hr between 11 and 12 @@ -1658,9 +1665,11 @@ STAGE PLANS: TableScan alias: partint filterExpr: hr NOT BETWEEN 12 AND 14 (type: boolean) + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select key, value, hr from partint where hr not between 12 and 14 @@ -2197,9 +2206,11 @@ STAGE PLANS: TableScan alias: partint filterExpr: (hr < 13) (type: boolean) + Statistics: Num rows: 1000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select key, value, hr from partint where hr < 13 diff --git a/ql/src/test/results/clientpositive/flatten_and_or.q.out b/ql/src/test/results/clientpositive/flatten_and_or.q.out new file mode 100644 index 0000000000..f39a1a1c33 --- /dev/null +++ b/ql/src/test/results/clientpositive/flatten_and_or.q.out @@ -0,0 +1,72 @@ +PREHOOK: query: explain +SELECT key +FROM src +WHERE + ((key = '0' + AND value = '8') OR (key = '1' + AND value = '5') OR (key = '2' + AND value = '6') OR (key = '3' + AND value = '8') OR (key = '4' + AND value = '1') OR (key = '5' + AND value = '6') OR (key = '6' + AND value = '1') OR (key = '7' + AND value = '1') OR (key = '8' + AND value = '1') OR (key = '9' + AND value = '1') OR (key = '10' + AND value = '3')) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT key +FROM src +WHERE + ((key = '0' + AND value = '8') OR (key = '1' + AND value = '5') OR (key = '2' + AND value = '6') OR (key = '3' + AND value = '8') OR (key = '4' + AND value = '1') OR (key = '5' + AND value = '6') OR (key = '6' + AND value = '1') OR (key = '7' + AND value = '1') OR (key = '8' + AND value = '1') OR (key = '9' + AND value = '1') OR (key = '10' + AND value = '3')) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean) + Statistics: Num rows: 11 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 957 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 957 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/floor_time.q.out b/ql/src/test/results/clientpositive/floor_time.q.out similarity index 72% rename from ql/src/test/results/clientpositive/llap/floor_time.q.out rename to ql/src/test/results/clientpositive/floor_time.q.out index c94c4c161e..010d437778 100644 --- a/ql/src/test/results/clientpositive/llap/floor_time.q.out +++ b/ql/src/test/results/clientpositive/floor_time.q.out @@ -47,19 +47,34 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@extract_udf_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: extract_udf_n0 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: floor_day(t) (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: extract_udf_n0 - Select Operator - expressions: floor_day(t) (type: timestamp) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: select floor_day(t) from extract_udf_n0 @@ -85,19 +100,34 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@extract_udf_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: extract_udf_n0 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: floor_day(t) (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: extract_udf_n0 - Select Operator - expressions: floor_day(t) (type: timestamp) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: select floor(t to day) from extract_udf_n0 diff --git a/ql/src/test/results/clientpositive/llap/fm-sketch.q.out b/ql/src/test/results/clientpositive/fm-sketch.q.out similarity index 75% rename from ql/src/test/results/clientpositive/llap/fm-sketch.q.out rename to ql/src/test/results/clientpositive/fm-sketch.q.out index f77a6db39a..921546eb64 100644 --- a/ql/src/test/results/clientpositive/llap/fm-sketch.q.out +++ b/ql/src/test/results/clientpositive/fm-sketch.q.out @@ -27,55 +27,45 @@ POSTHOOK: Output: default@n_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: n_n0 - Statistics: Num rows: 500 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'fm', 16) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: n_n0 + Statistics: Num rows: 500 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(VALUE._col0) - mode: mergepartial + aggregations: compute_stats(key, 'fm', 16) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Stats Work Basic Stats Work: Column Stats Desc: @@ -141,55 +131,45 @@ POSTHOOK: Output: default@i_n1 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: i_n1 - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'fm', 16) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: i_n1 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(VALUE._col0) - mode: mergepartial + aggregations: compute_stats(key, 'fm', 16) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/fold_case.q.out b/ql/src/test/results/clientpositive/fold_case.q.out new file mode 100644 index 0000000000..5939a034ce --- /dev/null +++ b/ql/src/test/results/clientpositive/fold_case.q.out @@ -0,0 +1,605 @@ +PREHOOK: query: explain +select count(1) from src where (case key when '238' then true else false end) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(1) from src where (case key when '238' then true else false end) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '238') (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '238') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(1) from src where (case key when '238' then 1=2 else 1=1 end) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(1) from src where (case key when '238' then 1=2 else 1=1 end) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '238') is not true (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '238') is not true (type: boolean) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(1) from src where (case key when '238' then 1=2 else 1=31 end) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(1) from src where (case key when '238' then 1=2 else 1=31 end) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(1) from src where (case key when '238' then true else 1=1 end) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(1) from src where (case key when '238' then true else 1=1 end) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(1) from src where (case key when '238' then 1=1 else 1=null end) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(1) from src where (case key when '238' then 1=1 else 1=null end) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '238') (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '238') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(1) from src where (case key when '238' then 1=null end) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(1) from src where (case key when '238' then 1=null end) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(1) from src where (case key when '238' then 2 = cast('2' as bigint) end) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(1) from src where (case key when '238' then 2 = cast('2' as bigint) end) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '238') (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '238') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select (case key when '238' then null else false end) from src where (case key when '238' then 2 = cast('1' as bigint) else true end) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select (case key when '238' then null else false end) from src where (case key when '238' then 2 = cast('1' as bigint) else true end) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '238') is not true (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '238') is not true (type: boolean) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ((key = '238') is true and null) (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select (case key when '238' then null else null end) from src where (case key when '238' then 2 = null else 3 = null end) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select (case key when '238' then null else null end) from src where (case key when '238' then 2 = null else 3 = null end) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: void) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain +select count(1) from src where (case key when '238' then null else 1=1 end) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select count(1) from src where (case key when '238' then null else 1=1 end) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '238') is not true (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '238') is not true (type: boolean) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select (CASE WHEN (-2) >= 0 THEN SUBSTRING(key, 1,CAST((-2) AS INT)) ELSE NULL END) +from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select (CASE WHEN (-2) >= 0 THEN SUBSTRING(key, 1,CAST((-2) AS INT)) ELSE NULL END) +from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain +select (CASE WHEN key = value THEN '1' WHEN true THEN '0' ELSE NULL END) +from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select (CASE WHEN key = value THEN '1' WHEN true THEN '0' ELSE NULL END) +from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((key = value)) THEN ('1') ELSE ('0') END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select (case when true then key when not true then to_date(key) else null end) from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select (case when true then key when not true then to_date(key) else null end) from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + diff --git a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out new file mode 100644 index 0000000000..dda4be8eb1 --- /dev/null +++ b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out @@ -0,0 +1,239 @@ +PREHOOK: query: explain +SELECT +SUM((CASE WHEN 1000000 = 0 THEN NULL ELSE l_partkey / 1000000 END)), +SUM(1) AS `sum_number_of_records_ok` FROM lineitem +WHERE +(((CASE WHEN ('N' = l_returnflag) THEN 1 ELSE 1 END) = 1) AND +((CASE WHEN ('MAIL' = l_shipmode) THEN 1 ELSE 1 END) = 1) AND +((CASE WHEN ('O' = l_linestatus) THEN 1 ELSE 1 END) = 1) AND +((CASE WHEN ('NONE' = l_shipinstruct) THEN 1 ELSE 1 END) = 1) AND +((CASE WHEN ('All' = (CASE WHEN (l_shipmode = 'TRUCK') THEN 'East' WHEN (l_shipmode = 'MAIL') THEN 'West' WHEN (l_shipmode = 'REG AIR') THEN 'BizDev' ELSE 'Other' END)) THEN 1 ELSE 1 END) = 1) AND +((CASE WHEN ('AIR' = l_shipmode) THEN 1 ELSE 1 END) = 1) AND +((CASE WHEN ('1996-03-30' = TO_DATE(l_shipdate)) THEN 1 ELSE NULL END) = 1) AND +((CASE WHEN ('RAIL' = l_shipmode) THEN 1 ELSE NULL END) = 1) AND (1 = 1) AND +((CASE WHEN (1 = l_linenumber) THEN 1 ELSE 1 END) = 1) AND (1 = 1)) +GROUP BY l_orderkey +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT +SUM((CASE WHEN 1000000 = 0 THEN NULL ELSE l_partkey / 1000000 END)), +SUM(1) AS `sum_number_of_records_ok` FROM lineitem +WHERE +(((CASE WHEN ('N' = l_returnflag) THEN 1 ELSE 1 END) = 1) AND +((CASE WHEN ('MAIL' = l_shipmode) THEN 1 ELSE 1 END) = 1) AND +((CASE WHEN ('O' = l_linestatus) THEN 1 ELSE 1 END) = 1) AND +((CASE WHEN ('NONE' = l_shipinstruct) THEN 1 ELSE 1 END) = 1) AND +((CASE WHEN ('All' = (CASE WHEN (l_shipmode = 'TRUCK') THEN 'East' WHEN (l_shipmode = 'MAIL') THEN 'West' WHEN (l_shipmode = 'REG AIR') THEN 'BizDev' ELSE 'Other' END)) THEN 1 ELSE 1 END) = 1) AND +((CASE WHEN ('AIR' = l_shipmode) THEN 1 ELSE 1 END) = 1) AND +((CASE WHEN ('1996-03-30' = TO_DATE(l_shipdate)) THEN 1 ELSE NULL END) = 1) AND +((CASE WHEN ('RAIL' = l_shipmode) THEN 1 ELSE NULL END) = 1) AND (1 = 1) AND +((CASE WHEN (1 = l_linenumber) THEN 1 ELSE 1 END) = 1) AND (1 = 1)) +GROUP BY l_orderkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem + filterExpr: ((l_shipmode = 'RAIL') and (DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP)))) (type: boolean) + Statistics: Num rows: 100 Data size: 19000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((l_shipmode = 'RAIL') and (DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP)))) (type: boolean) + Statistics: Num rows: 7 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l_orderkey (type: int), (UDFToDouble(l_partkey) / 1000000.0D) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1), sum(1) + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: double), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select key from src where (case key when '238' then 1 else 2 end) = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select key from src where (case key when '238' then 1 else 2 end) = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '238') (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '238') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '238' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select key from src where (case key when '238' then 1 when '94' then 1 else 3 end) = cast('1' as int) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select key from src where (case key when '238' then 1 when '94' then 1 else 3 end) = cast('1' as int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key) IN ('238', '94') (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key) IN ('238', '94') (type: boolean) + Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select key from src where (case key when '238' then 1 else 2 end) = (case when key != '238' then 1 else 1 end) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select key from src where (case key when '238' then 1 else 2 end) = (case when key != '238' then 1 else 1 end) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '238') (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '238') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '238' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select key from src where (case key when '238' then 1 end) = (case when key != '238' then 1 when key = '23' then 1 end) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select key from src where (case key when '238' then 1 end) = (case when key != '238' then 1 when key = '23' then 1 end) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/fold_to_null.q.out b/ql/src/test/results/clientpositive/fold_to_null.q.out new file mode 100644 index 0000000000..eaba925a42 --- /dev/null +++ b/ql/src/test/results/clientpositive/fold_to_null.q.out @@ -0,0 +1,265 @@ +PREHOOK: query: create table t (a int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (a int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: create table t2 (b int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 (b int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: create table t3 (c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t3 +POSTHOOK: query: create table t3 (c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t3 +PREHOOK: query: insert into t values(3),(10) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values(3),(10) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.a SCRIPT [] +Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain select a from t,t2,t3 where + (a>3 and null between 0 and 10) is null +PREHOOK: type: QUERY +PREHOOK: Input: default@t +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: explain select a from t,t2,t3 where + (a>3 and null between 0 and 10) is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t + filterExpr: ((a > 3) and null) is null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((a > 3) and null) is null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + TableScan + alias: t3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 5 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 6 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 6 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select a from t,t2,t3 where + (a>5 or null between 0 and 10) and (a*a < 101) + and t.a=t2.b and t.a=t3.c +PREHOOK: type: QUERY +PREHOOK: Input: default@t +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: explain select a from t,t2,t3 where + (a>5 or null between 0 and 10) and (a*a < 101) + and t.a=t2.b and t.a=t3.c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t + filterExpr: ((a > 5) and ((a * a) < 101)) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((a > 5) and ((a * a) < 101)) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: t2 + filterExpr: ((b > 5) and ((b * b) < 101)) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((b > 5) and ((b * b) < 101)) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: t3 + filterExpr: ((c > 5) and ((c * c) < 101)) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((c > 5) and ((c * c) < 101)) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/fold_when.q.out b/ql/src/test/results/clientpositive/fold_when.q.out similarity index 54% rename from ql/src/test/results/clientpositive/llap/fold_when.q.out rename to ql/src/test/results/clientpositive/fold_when.q.out index 9390afd96f..8753d9fddf 100644 --- a/ql/src/test/results/clientpositive/llap/fold_when.q.out +++ b/ql/src/test/results/clientpositive/fold_when.q.out @@ -58,9 +58,11 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain @@ -94,22 +96,38 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '238') (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '238') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '238' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src - filterExpr: (key = '238') (type: boolean) - Filter Operator - predicate: (key = '238') (type: boolean) - Select Operator - expressions: '238' (type: string) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: explain select key from src where ((case when (key = '238') then 1=1 else null=1 end)) @@ -122,22 +140,38 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '238') (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '238') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '238' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src - filterExpr: (key = '238') (type: boolean) - Filter Operator - predicate: (key = '238') (type: boolean) - Select Operator - expressions: '238' (type: string) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: explain select key from src where ((case when (key = '238') then 1=1 else 2=2 end)) @@ -159,9 +193,11 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain @@ -195,22 +231,38 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '238') (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '238') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '238' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src - filterExpr: (key = '238') (type: boolean) - Filter Operator - predicate: (key = '238') (type: boolean) - Select Operator - expressions: '238' (type: string) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: explain select key from src where ((case when (key = '238') then 1=3 else 1=1 end)) @@ -223,22 +275,38 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '238') is not true (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '238') is not true (type: boolean) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src - filterExpr: (key = '238') is not true (type: boolean) - Filter Operator - predicate: (key = '238') is not true (type: boolean) - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: explain select key from src where ((case when ('23' = '23') then 1 else 1 end) = 1) @@ -260,9 +328,11 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain @@ -285,9 +355,11 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain @@ -310,9 +382,11 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain @@ -326,22 +400,38 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (key = '11') is not true (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '11') is not true (type: boolean) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src - filterExpr: (key = '11') is not true (type: boolean) - Filter Operator - predicate: (key = '11') is not true (type: boolean) - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - ListSink + ListSink PREHOOK: query: explain select key from src where ((case when (key = (case when (key = '238') then '12' else '11' end)) then 2=2 else true end)) @@ -363,8 +453,10 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/folder_predicate.q.out b/ql/src/test/results/clientpositive/folder_predicate.q.out new file mode 100644 index 0000000000..b4c4724a24 --- /dev/null +++ b/ql/src/test/results/clientpositive/folder_predicate.q.out @@ -0,0 +1,410 @@ +PREHOOK: query: drop table if exists predicate_fold_tb +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists predicate_fold_tb +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table predicate_fold_tb(value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@predicate_fold_tb +POSTHOOK: query: create table predicate_fold_tb(value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@predicate_fold_tb +PREHOOK: query: insert into predicate_fold_tb values(NULL), (1), (2), (3), (4), (5) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@predicate_fold_tb +POSTHOOK: query: insert into predicate_fold_tb values(NULL), (1), (2), (3), (4), (5) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@predicate_fold_tb +POSTHOOK: Lineage: predicate_fold_tb.value SCRIPT [] +PREHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value = 3) +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value = 3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: predicate_fold_tb + filterExpr: ((value <> 3) or value is null) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((value <> 3) or value is null) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value = 3) +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value = 3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +NULL +1 +2 +4 +5 +PREHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value >= 3) +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value >= 3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: predicate_fold_tb + filterExpr: (value is null or (value < 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value is null or (value < 3)) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value >= 3) +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value >= 3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +NULL +1 +2 +PREHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <= 3) +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <= 3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: predicate_fold_tb + filterExpr: (value is null or (value > 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value is null or (value > 3)) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <= 3) +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <= 3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +NULL +4 +5 +PREHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 3) +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: predicate_fold_tb + filterExpr: (value is null or (value <= 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value is null or (value <= 3)) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 3) +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +NULL +1 +2 +3 +PREHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value < 3) +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value < 3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: predicate_fold_tb + filterExpr: (value is null or (value >= 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value is null or (value >= 3)) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value < 3) +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value < 3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +NULL +3 +4 +5 +PREHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <> 3) +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <> 3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: predicate_fold_tb + filterExpr: (value is null or (value = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value is null or (value = 3)) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <> 3) +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <> 3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +NULL +3 +PREHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 1 AND value <=3) +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 1 AND value <=3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: predicate_fold_tb + filterExpr: (value is null or (value <= 1) or (value > 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value is null or (value <= 1) or (value > 3)) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 1 AND value <=3) +PREHOOK: type: QUERY +PREHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 1 AND value <=3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@predicate_fold_tb +#### A masked pattern was here #### +NULL +1 +4 +5 diff --git a/ql/src/test/results/clientpositive/llap/foldts.q.out b/ql/src/test/results/clientpositive/foldts.q.out similarity index 62% rename from ql/src/test/results/clientpositive/llap/foldts.q.out rename to ql/src/test/results/clientpositive/foldts.q.out index 15c9610c5f..feda88c156 100644 --- a/ql/src/test/results/clientpositive/llap/foldts.q.out +++ b/ql/src/test/results/clientpositive/foldts.q.out @@ -14,30 +14,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctimestamp1 (type: timestamp), to_unix_timestamp(ctimestamp1) (type: bigint), to_unix_timestamp(ctimestamp1) (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 563568 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctimestamp1 (type: timestamp), to_unix_timestamp(ctimestamp1) (type: bigint), to_unix_timestamp(ctimestamp1) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12288 Data size: 563568 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -80,30 +76,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2260992 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2260992 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -136,30 +128,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2260992 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2260992 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/fp_literal_arithmetic.q.out b/ql/src/test/results/clientpositive/fp_literal_arithmetic.q.out new file mode 100644 index 0000000000..1d28b276e4 --- /dev/null +++ b/ql/src/test/results/clientpositive/fp_literal_arithmetic.q.out @@ -0,0 +1,370 @@ +PREHOOK: query: explain +select sum(l_extendedprice) from lineitem q0 where l_discount +between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: explain +select sum(l_extendedprice) from lineitem q0 where l_discount +between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: q0 + filterExpr: l_discount BETWEEN 0.05D AND 0.07D (type: boolean) + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: l_discount BETWEEN 0.05D AND 0.07D (type: boolean) + Statistics: Num rows: 35 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l_extendedprice (type: double) + outputColumnNames: l_extendedprice + Statistics: Num rows: 35 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(l_extendedprice) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(l_extendedprice) from lineitem q0 where l_discount +between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select sum(l_extendedprice) from lineitem q0 where l_discount +between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +1201714.81 +PREHOOK: query: explain +select sum(l_extendedprice) from lineitem q1 where l_discount +between 0.06 - 0.01 and 0.06 + 0.01 +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: explain +select sum(l_extendedprice) from lineitem q1 where l_discount +between 0.06 - 0.01 and 0.06 + 0.01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: q1 + filterExpr: l_discount BETWEEN 0.05D AND 0.07D (type: boolean) + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: l_discount BETWEEN 0.05D AND 0.07D (type: boolean) + Statistics: Num rows: 35 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l_extendedprice (type: double) + outputColumnNames: l_extendedprice + Statistics: Num rows: 35 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(l_extendedprice) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(l_extendedprice) from lineitem q1 where l_discount +between 0.06 - 0.01 and 0.06 + 0.01 +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select sum(l_extendedprice) from lineitem q1 where l_discount +between 0.06 - 0.01 and 0.06 + 0.01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +1201714.81 +PREHOOK: query: select sum(l_extendedprice) from lineitem q2 where l_discount +between 0.05 and 0.07 +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select sum(l_extendedprice) from lineitem q2 where l_discount +between 0.05 and 0.07 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +1201714.81 +PREHOOK: query: select sum(l_extendedprice) from lineitem q3 where l_discount +between (cast('0.06' as decimal(3,2)) - cast('0.01' as decimal(3,2))) + and (cast('0.06' as decimal(3,2)) + cast('0.01' as decimal(3,2))) +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select sum(l_extendedprice) from lineitem q3 where l_discount +between (cast('0.06' as decimal(3,2)) - cast('0.01' as decimal(3,2))) + and (cast('0.06' as decimal(3,2)) + cast('0.01' as decimal(3,2))) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +1201714.81 +PREHOOK: query: select sum(l_extendedprice) from lineitem q4 where l_discount +between (cast('0.06' as decimal(3,2)) - cast('0.01' as double)) + and (cast('0.06' as decimal(3,2)) + cast('0.01' as double)) +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select sum(l_extendedprice) from lineitem q4 where l_discount +between (cast('0.06' as decimal(3,2)) - cast('0.01' as double)) + and (cast('0.06' as decimal(3,2)) + cast('0.01' as double)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +960361.91 +PREHOOK: query: explain +select sum(l_extendedprice) from lineitem q10 where l_discount +between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: explain +select sum(l_extendedprice) from lineitem q10 where l_discount +between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: q10 + filterExpr: l_discount BETWEEN CAST( '0.05' AS decimal(3,2)) AND CAST( '0.07' AS decimal(3,2)) (type: boolean) + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: l_discount BETWEEN CAST( '0.05' AS decimal(3,2)) AND CAST( '0.07' AS decimal(3,2)) (type: boolean) + Statistics: Num rows: 11 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l_extendedprice (type: double) + outputColumnNames: l_extendedprice + Statistics: Num rows: 11 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(l_extendedprice) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(l_extendedprice) from lineitem q10 where l_discount +between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select sum(l_extendedprice) from lineitem q10 where l_discount +between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +1201714.81 +PREHOOK: query: explain +select sum(l_extendedprice) from lineitem q11 where l_discount +between 0.06 - 0.01 and 0.06 + 0.01 +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: explain +select sum(l_extendedprice) from lineitem q11 where l_discount +between 0.06 - 0.01 and 0.06 + 0.01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: q11 + filterExpr: l_discount BETWEEN (0.06 - 0.01) AND (0.06 + 0.01) (type: boolean) + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: l_discount BETWEEN (0.06 - 0.01) AND (0.06 + 0.01) (type: boolean) + Statistics: Num rows: 11 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l_extendedprice (type: double) + outputColumnNames: l_extendedprice + Statistics: Num rows: 11 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(l_extendedprice) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(l_extendedprice) from lineitem q11 where l_discount +between 0.06 - 0.01 and 0.06 + 0.01 +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select sum(l_extendedprice) from lineitem q11 where l_discount +between 0.06 - 0.01 and 0.06 + 0.01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +1201714.81 +PREHOOK: query: select sum(l_extendedprice) from lineitem q12 where l_discount +between 0.05 and 0.07 +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select sum(l_extendedprice) from lineitem q12 where l_discount +between 0.05 and 0.07 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +1201714.81 +PREHOOK: query: select sum(l_extendedprice) from lineitem q13 where l_discount +between (cast('0.06' as decimal(3,2)) - cast('0.01' as decimal(3,2))) + and (cast('0.06' as decimal(3,2)) + cast('0.01' as decimal(3,2))) +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select sum(l_extendedprice) from lineitem q13 where l_discount +between (cast('0.06' as decimal(3,2)) - cast('0.01' as decimal(3,2))) + and (cast('0.06' as decimal(3,2)) + cast('0.01' as decimal(3,2))) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +1201714.81 +PREHOOK: query: select sum(l_extendedprice) from lineitem q14 where l_discount +between (cast('0.06' as decimal(3,2)) - cast('0.01' as double)) + and (cast('0.06' as decimal(3,2)) + cast('0.01' as double)) +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select sum(l_extendedprice) from lineitem q14 where l_discount +between (cast('0.06' as decimal(3,2)) - cast('0.01' as double)) + and (cast('0.06' as decimal(3,2)) + cast('0.01' as double)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +960361.91 diff --git a/ql/src/test/results/clientpositive/gby_star.q.out b/ql/src/test/results/clientpositive/gby_star.q.out new file mode 100644 index 0000000000..33ac8c32ae --- /dev/null +++ b/ql/src/test/results/clientpositive/gby_star.q.out @@ -0,0 +1,381 @@ +PREHOOK: query: explain +select *, sum(key) from src group by key, value limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select *, sum(key) from src group by key, value limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(key) + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1860 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1860 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select *, sum(key) from src group by key, value limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select *, sum(key) from src group by key, value limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0.0 +10 val_10 10.0 +100 val_100 200.0 +103 val_103 206.0 +104 val_104 208.0 +105 val_105 105.0 +11 val_11 11.0 +111 val_111 111.0 +113 val_113 226.0 +114 val_114 114.0 +PREHOOK: query: explain +select *, sum(key) from src where key < 100 group by key, value limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select *, sum(key) from src where key < 100 group by key, value limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(key) + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1860 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1860 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select *, sum(key) from src where key < 100 group by key, value limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select *, sum(key) from src where key < 100 group by key, value limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0.0 +10 val_10 10.0 +11 val_11 11.0 +12 val_12 24.0 +15 val_15 30.0 +17 val_17 17.0 +18 val_18 36.0 +19 val_19 19.0 +2 val_2 2.0 +20 val_20 20.0 +PREHOOK: query: explain +select *, sum(key) from (select key from src where key < 100) a group by key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select *, sum(key) from (select key from src where key < 100) a group by key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(key) + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select *, sum(key) from (select key from src where key < 100) a group by key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select *, sum(key) from (select key from src where key < 100) a group by key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 +10 10.0 +11 11.0 +12 24.0 +15 30.0 +17 17.0 +18 36.0 +19 19.0 +2 2.0 +20 20.0 +PREHOOK: query: explain +select a.*, sum(src.key) from (select key from src where key < 100) a +inner join src on a.key = src.key group by a.key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain +select a.*, sum(src.key) from (select key from src where key < 100) a +inner join src on a.key = src.key group by a.key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src + filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key) < 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 28884 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select a.*, sum(src.key) from (select key from src where key < 100) a +inner join src on a.key = src.key group by a.key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select a.*, sum(src.key) from (select key from src where key < 100) a +inner join src on a.key = src.key group by a.key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 +10 10.0 +11 11.0 +12 48.0 +15 60.0 +17 17.0 +18 72.0 +19 19.0 +2 2.0 +20 20.0 diff --git a/ql/src/test/results/clientpositive/groupby10.q.out b/ql/src/test/results/clientpositive/groupby10.q.out new file mode 100644 index 0000000000..583f2fbc28 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby10.q.out @@ -0,0 +1,1073 @@ +PREHOOK: query: CREATE TABLE dest1_n0(key INT, val1 INT, val2 INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n0 +POSTHOOK: query: CREATE TABLE dest1_n0(key INT, val1 INT, val2 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n0 +PREHOOK: query: CREATE TABLE dest2(key INT, val1 INT, val2 INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest2 +POSTHOOK: query: CREATE TABLE dest2(key INT, val1 INT, val2 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest2 +PREHOOK: query: CREATE TABLE INPUT(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@INPUT +POSTHOOK: query: CREATE TABLE INPUT(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@INPUT +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE INPUT +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@input +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE INPUT +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@input +PREHOOK: query: EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +PREHOOK: type: QUERY +PREHOOK: Input: default@input +PREHOOK: Output: default@dest1_n0 +PREHOOK: Output: default@dest2 +POSTHOOK: query: EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@input +POSTHOOK: Output: default@dest1_n0 +POSTHOOK: Output: default@dest2 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-5 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: input + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), substr(value, 5) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n0 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n0 + + Stage: Stage-4 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1_n0 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: int), substr(value, 5) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +PREHOOK: type: QUERY +PREHOOK: Input: default@input +PREHOOK: Output: default@dest1_n0 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@input +POSTHOOK: Output: default@dest1_n0 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1_n0.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1_n0.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1_n0.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: SELECT * from dest1_n0 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n0 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest1_n0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n0 +#### A masked pattern was here #### +128 1 1 +150 1 1 +165 1 1 +193 1 1 +213 3 2 +224 1 1 +238 3 3 +255 1 1 +265 1 1 +27 1 1 +273 1 1 +278 1 1 +311 1 1 +369 1 1 +401 1 1 +409 1 1 +484 1 1 +66 1 1 +86 1 1 +98 1 1 +PREHOOK: query: SELECT * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +128 128 128 +150 150 150 +165 165 165 +193 193 193 +213 640 427 +224 224 224 +238 717 717 +255 255 255 +265 265 265 +27 27 27 +273 273 273 +278 278 278 +311 311 311 +369 369 369 +401 401 401 +409 409 409 +484 484 484 +66 66 66 +86 86 86 +98 98 98 +PREHOOK: query: EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +PREHOOK: type: QUERY +PREHOOK: Input: default@input +PREHOOK: Output: default@dest1_n0 +PREHOOK: Output: default@dest2 +POSTHOOK: query: EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@input +POSTHOOK: Output: default@dest1_n0 +POSTHOOK: Output: default@dest2 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-5 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: input + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), substr(value, 5) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n0 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n0 + + Stage: Stage-4 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1_n0 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: int), substr(value, 5) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +PREHOOK: type: QUERY +PREHOOK: Input: default@input +PREHOOK: Output: default@dest1_n0 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@input +POSTHOOK: Output: default@dest1_n0 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1_n0.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1_n0.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1_n0.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: SELECT * from dest1_n0 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n0 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest1_n0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n0 +#### A masked pattern was here #### +128 1 1 +150 1 1 +165 1 1 +193 1 1 +213 3 2 +224 1 1 +238 3 3 +255 1 1 +265 1 1 +27 1 1 +273 1 1 +278 1 1 +311 1 1 +369 1 1 +401 1 1 +409 1 1 +484 1 1 +66 1 1 +86 1 1 +98 1 1 +PREHOOK: query: SELECT * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +128 128 128 +150 150 150 +165 165 165 +193 193 193 +213 640 427 +224 224 224 +238 717 717 +255 255 255 +265 265 265 +27 27 27 +273 273 273 +278 278 278 +311 311 311 +369 369 369 +401 401 401 +409 409 409 +484 484 484 +66 66 66 +86 86 86 +98 98 98 +PREHOOK: query: EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +PREHOOK: type: QUERY +PREHOOK: Input: default@input +PREHOOK: Output: default@dest1_n0 +PREHOOK: Output: default@dest2 +POSTHOOK: query: EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@input +POSTHOOK: Output: default@dest1_n0 +POSTHOOK: Output: default@dest2 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 + Stage-1 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: input + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), substr(value, 5) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Forward + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n0 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n0 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1_n0 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +PREHOOK: type: QUERY +PREHOOK: Input: default@input +PREHOOK: Output: default@dest1_n0 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@input +POSTHOOK: Output: default@dest1_n0 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1_n0.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1_n0.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1_n0.val2 EXPRESSION [(input)input.null, ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.null, ] +PREHOOK: query: SELECT * from dest1_n0 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n0 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest1_n0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n0 +#### A masked pattern was here #### +128 128 1 +150 150 1 +165 165 1 +193 193 1 +213 427 2 +224 224 1 +238 717 3 +255 255 1 +265 265 1 +27 27 1 +273 273 1 +278 278 1 +311 311 1 +369 369 1 +401 401 1 +409 409 1 +484 484 1 +66 66 1 +86 86 1 +98 98 1 +PREHOOK: query: SELECT * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +128 128 128 +150 150 150 +165 165 165 +193 193 193 +213 427 213 +224 224 224 +238 717 239 +255 255 255 +265 265 265 +27 27 27 +273 273 273 +278 278 278 +311 311 311 +369 369 369 +401 401 401 +409 409 409 +484 484 484 +66 66 66 +86 86 86 +98 98 98 diff --git a/ql/src/test/results/clientpositive/groupby11.q.out b/ql/src/test/results/clientpositive/groupby11.q.out new file mode 100644 index 0000000000..4b62238506 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby11.q.out @@ -0,0 +1,1017 @@ +PREHOOK: query: CREATE TABLE dest1_n137(key STRING, val1 INT, val2 INT) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n137 +POSTHOOK: query: CREATE TABLE dest1_n137(key STRING, val1 INT, val2 INT) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n137 +PREHOOK: query: CREATE TABLE dest2_n36(key STRING, val1 INT, val2 INT) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest2_n36 +POSTHOOK: query: CREATE TABLE dest2_n36(key STRING, val1 INT, val2 INT) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest2_n36 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n137 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value +INSERT OVERWRITE TABLE dest2_n36 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n137@ds=111 +PREHOOK: Output: default@dest2_n36@ds=111 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n137 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value +INSERT OVERWRITE TABLE dest2_n36 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n137@ds=111 +POSTHOOK: Output: default@dest2_n36@ds=111 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-5 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: value (type: string), key (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 53500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 53500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 307 Data size: 32849 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 307 Data size: 30393 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 307 Data size: 30393 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n137 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 307 Data size: 57102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 111 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n137 + + Stage: Stage-4 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest1_n137 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 307 Data size: 57102 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string), val1 (type: int), val2 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest2_n36 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: substr(value, 5) (type: string), key (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 307 Data size: 31314 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 307 Data size: 28858 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 307 Data size: 28858 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n36 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 307 Data size: 55567 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Move Operator + tables: + partition: + ds 111 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n36 + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 307 Data size: 55567 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string), val1 (type: int), val2 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n137 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value +INSERT OVERWRITE TABLE dest2_n36 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n137@ds=111 +PREHOOK: Output: default@dest2_n36@ds=111 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n137 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value +INSERT OVERWRITE TABLE dest2_n36 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n137@ds=111 +POSTHOOK: Output: default@dest2_n36@ds=111 +POSTHOOK: Lineage: dest1_n137 PARTITION(ds=111).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n137 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n137 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2_n36 PARTITION(ds=111).key EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2_n36 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2_n36 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * from dest1_n137 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n137 +PREHOOK: Input: default@dest1_n137@ds=111 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest1_n137 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n137 +POSTHOOK: Input: default@dest1_n137@ds=111 +#### A masked pattern was here #### +val_0 3 1 111 +val_10 1 1 111 +val_100 2 1 111 +val_103 2 1 111 +val_104 2 1 111 +val_105 1 1 111 +val_11 1 1 111 +val_111 1 1 111 +val_113 2 1 111 +val_114 1 1 111 +val_116 1 1 111 +val_118 2 1 111 +val_119 3 1 111 +val_12 2 1 111 +val_120 2 1 111 +val_125 2 1 111 +val_126 1 1 111 +val_128 3 1 111 +val_129 2 1 111 +val_131 1 1 111 +val_133 1 1 111 +val_134 2 1 111 +val_136 1 1 111 +val_137 2 1 111 +val_138 4 1 111 +val_143 1 1 111 +val_145 1 1 111 +val_146 2 1 111 +val_149 2 1 111 +val_15 2 1 111 +val_150 1 1 111 +val_152 2 1 111 +val_153 1 1 111 +val_155 1 1 111 +val_156 1 1 111 +val_157 1 1 111 +val_158 1 1 111 +val_160 1 1 111 +val_162 1 1 111 +val_163 1 1 111 +val_164 2 1 111 +val_165 2 1 111 +val_166 1 1 111 +val_167 3 1 111 +val_168 1 1 111 +val_169 4 1 111 +val_17 1 1 111 +val_170 1 1 111 +val_172 2 1 111 +val_174 2 1 111 +val_175 2 1 111 +val_176 2 1 111 +val_177 1 1 111 +val_178 1 1 111 +val_179 2 1 111 +val_18 2 1 111 +val_180 1 1 111 +val_181 1 1 111 +val_183 1 1 111 +val_186 1 1 111 +val_187 3 1 111 +val_189 1 1 111 +val_19 1 1 111 +val_190 1 1 111 +val_191 2 1 111 +val_192 1 1 111 +val_193 3 1 111 +val_194 1 1 111 +val_195 2 1 111 +val_196 1 1 111 +val_197 2 1 111 +val_199 3 1 111 +val_2 1 1 111 +val_20 1 1 111 +val_200 2 1 111 +val_201 1 1 111 +val_202 1 1 111 +val_203 2 1 111 +val_205 2 1 111 +val_207 2 1 111 +val_208 3 1 111 +val_209 2 1 111 +val_213 2 1 111 +val_214 1 1 111 +val_216 2 1 111 +val_217 2 1 111 +val_218 1 1 111 +val_219 2 1 111 +val_221 2 1 111 +val_222 1 1 111 +val_223 2 1 111 +val_224 2 1 111 +val_226 1 1 111 +val_228 1 1 111 +val_229 2 1 111 +val_230 5 1 111 +val_233 2 1 111 +val_235 1 1 111 +val_237 2 1 111 +val_238 2 1 111 +val_239 2 1 111 +val_24 2 1 111 +val_241 1 1 111 +val_242 2 1 111 +val_244 1 1 111 +val_247 1 1 111 +val_248 1 1 111 +val_249 1 1 111 +val_252 1 1 111 +val_255 2 1 111 +val_256 2 1 111 +val_257 1 1 111 +val_258 1 1 111 +val_26 2 1 111 +val_260 1 1 111 +val_262 1 1 111 +val_263 1 1 111 +val_265 2 1 111 +val_266 1 1 111 +val_27 1 1 111 +val_272 2 1 111 +val_273 3 1 111 +val_274 1 1 111 +val_275 1 1 111 +val_277 4 1 111 +val_278 2 1 111 +val_28 1 1 111 +val_280 2 1 111 +val_281 2 1 111 +val_282 2 1 111 +val_283 1 1 111 +val_284 1 1 111 +val_285 1 1 111 +val_286 1 1 111 +val_287 1 1 111 +val_288 2 1 111 +val_289 1 1 111 +val_291 1 1 111 +val_292 1 1 111 +val_296 1 1 111 +val_298 3 1 111 +val_30 1 1 111 +val_302 1 1 111 +val_305 1 1 111 +val_306 1 1 111 +val_307 2 1 111 +val_308 1 1 111 +val_309 2 1 111 +val_310 1 1 111 +val_311 3 1 111 +val_315 1 1 111 +val_316 3 1 111 +val_317 2 1 111 +val_318 3 1 111 +val_321 2 1 111 +val_322 2 1 111 +val_323 1 1 111 +val_325 2 1 111 +val_327 3 1 111 +val_33 1 1 111 +val_331 2 1 111 +val_332 1 1 111 +val_333 2 1 111 +val_335 1 1 111 +val_336 1 1 111 +val_338 1 1 111 +val_339 1 1 111 +val_34 1 1 111 +val_341 1 1 111 +val_342 2 1 111 +val_344 2 1 111 +val_345 1 1 111 +val_348 5 1 111 +val_35 3 1 111 +val_351 1 1 111 +val_353 2 1 111 +val_356 1 1 111 +val_360 1 1 111 +val_362 1 1 111 +val_364 1 1 111 +val_365 1 1 111 +val_366 1 1 111 +val_367 2 1 111 +val_368 1 1 111 +val_369 3 1 111 +val_37 2 1 111 +val_373 1 1 111 +val_374 1 1 111 +val_375 1 1 111 +val_377 1 1 111 +val_378 1 1 111 +val_379 1 1 111 +val_382 2 1 111 +val_384 3 1 111 +val_386 1 1 111 +val_389 1 1 111 +val_392 1 1 111 +val_393 1 1 111 +val_394 1 1 111 +val_395 2 1 111 +val_396 3 1 111 +val_397 2 1 111 +val_399 2 1 111 +val_4 1 1 111 +val_400 1 1 111 +val_401 5 1 111 +val_402 1 1 111 +val_403 3 1 111 +val_404 2 1 111 +val_406 4 1 111 +val_407 1 1 111 +val_409 3 1 111 +val_41 1 1 111 +val_411 1 1 111 +val_413 2 1 111 +val_414 2 1 111 +val_417 3 1 111 +val_418 1 1 111 +val_419 1 1 111 +val_42 2 1 111 +val_421 1 1 111 +val_424 2 1 111 +val_427 1 1 111 +val_429 2 1 111 +val_43 1 1 111 +val_430 3 1 111 +val_431 3 1 111 +val_432 1 1 111 +val_435 1 1 111 +val_436 1 1 111 +val_437 1 1 111 +val_438 3 1 111 +val_439 2 1 111 +val_44 1 1 111 +val_443 1 1 111 +val_444 1 1 111 +val_446 1 1 111 +val_448 1 1 111 +val_449 1 1 111 +val_452 1 1 111 +val_453 1 1 111 +val_454 3 1 111 +val_455 1 1 111 +val_457 1 1 111 +val_458 2 1 111 +val_459 2 1 111 +val_460 1 1 111 +val_462 2 1 111 +val_463 2 1 111 +val_466 3 1 111 +val_467 1 1 111 +val_468 4 1 111 +val_469 5 1 111 +val_47 1 1 111 +val_470 1 1 111 +val_472 1 1 111 +val_475 1 1 111 +val_477 1 1 111 +val_478 2 1 111 +val_479 1 1 111 +val_480 3 1 111 +val_481 1 1 111 +val_482 1 1 111 +val_483 1 1 111 +val_484 1 1 111 +val_485 1 1 111 +val_487 1 1 111 +val_489 4 1 111 +val_490 1 1 111 +val_491 1 1 111 +val_492 2 1 111 +val_493 1 1 111 +val_494 1 1 111 +val_495 1 1 111 +val_496 1 1 111 +val_497 1 1 111 +val_498 3 1 111 +val_5 3 1 111 +val_51 2 1 111 +val_53 1 1 111 +val_54 1 1 111 +val_57 1 1 111 +val_58 2 1 111 +val_64 1 1 111 +val_65 1 1 111 +val_66 1 1 111 +val_67 2 1 111 +val_69 1 1 111 +val_70 3 1 111 +val_72 2 1 111 +val_74 1 1 111 +val_76 2 1 111 +val_77 1 1 111 +val_78 1 1 111 +val_8 1 1 111 +val_80 1 1 111 +val_82 1 1 111 +val_83 2 1 111 +val_84 2 1 111 +val_85 1 1 111 +val_86 1 1 111 +val_87 1 1 111 +val_9 1 1 111 +val_90 3 1 111 +val_92 1 1 111 +val_95 2 1 111 +val_96 1 1 111 +val_97 2 1 111 +val_98 2 1 111 +PREHOOK: query: SELECT * from dest2_n36 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2_n36 +PREHOOK: Input: default@dest2_n36@ds=111 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest2_n36 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2_n36 +POSTHOOK: Input: default@dest2_n36@ds=111 +#### A masked pattern was here #### +0 3 1 111 +10 1 1 111 +100 2 1 111 +103 2 1 111 +104 2 1 111 +105 1 1 111 +11 1 1 111 +111 1 1 111 +113 2 1 111 +114 1 1 111 +116 1 1 111 +118 2 1 111 +119 3 1 111 +12 2 1 111 +120 2 1 111 +125 2 1 111 +126 1 1 111 +128 3 1 111 +129 2 1 111 +131 1 1 111 +133 1 1 111 +134 2 1 111 +136 1 1 111 +137 2 1 111 +138 4 1 111 +143 1 1 111 +145 1 1 111 +146 2 1 111 +149 2 1 111 +15 2 1 111 +150 1 1 111 +152 2 1 111 +153 1 1 111 +155 1 1 111 +156 1 1 111 +157 1 1 111 +158 1 1 111 +160 1 1 111 +162 1 1 111 +163 1 1 111 +164 2 1 111 +165 2 1 111 +166 1 1 111 +167 3 1 111 +168 1 1 111 +169 4 1 111 +17 1 1 111 +170 1 1 111 +172 2 1 111 +174 2 1 111 +175 2 1 111 +176 2 1 111 +177 1 1 111 +178 1 1 111 +179 2 1 111 +18 2 1 111 +180 1 1 111 +181 1 1 111 +183 1 1 111 +186 1 1 111 +187 3 1 111 +189 1 1 111 +19 1 1 111 +190 1 1 111 +191 2 1 111 +192 1 1 111 +193 3 1 111 +194 1 1 111 +195 2 1 111 +196 1 1 111 +197 2 1 111 +199 3 1 111 +2 1 1 111 +20 1 1 111 +200 2 1 111 +201 1 1 111 +202 1 1 111 +203 2 1 111 +205 2 1 111 +207 2 1 111 +208 3 1 111 +209 2 1 111 +213 2 1 111 +214 1 1 111 +216 2 1 111 +217 2 1 111 +218 1 1 111 +219 2 1 111 +221 2 1 111 +222 1 1 111 +223 2 1 111 +224 2 1 111 +226 1 1 111 +228 1 1 111 +229 2 1 111 +230 5 1 111 +233 2 1 111 +235 1 1 111 +237 2 1 111 +238 2 1 111 +239 2 1 111 +24 2 1 111 +241 1 1 111 +242 2 1 111 +244 1 1 111 +247 1 1 111 +248 1 1 111 +249 1 1 111 +252 1 1 111 +255 2 1 111 +256 2 1 111 +257 1 1 111 +258 1 1 111 +26 2 1 111 +260 1 1 111 +262 1 1 111 +263 1 1 111 +265 2 1 111 +266 1 1 111 +27 1 1 111 +272 2 1 111 +273 3 1 111 +274 1 1 111 +275 1 1 111 +277 4 1 111 +278 2 1 111 +28 1 1 111 +280 2 1 111 +281 2 1 111 +282 2 1 111 +283 1 1 111 +284 1 1 111 +285 1 1 111 +286 1 1 111 +287 1 1 111 +288 2 1 111 +289 1 1 111 +291 1 1 111 +292 1 1 111 +296 1 1 111 +298 3 1 111 +30 1 1 111 +302 1 1 111 +305 1 1 111 +306 1 1 111 +307 2 1 111 +308 1 1 111 +309 2 1 111 +310 1 1 111 +311 3 1 111 +315 1 1 111 +316 3 1 111 +317 2 1 111 +318 3 1 111 +321 2 1 111 +322 2 1 111 +323 1 1 111 +325 2 1 111 +327 3 1 111 +33 1 1 111 +331 2 1 111 +332 1 1 111 +333 2 1 111 +335 1 1 111 +336 1 1 111 +338 1 1 111 +339 1 1 111 +34 1 1 111 +341 1 1 111 +342 2 1 111 +344 2 1 111 +345 1 1 111 +348 5 1 111 +35 3 1 111 +351 1 1 111 +353 2 1 111 +356 1 1 111 +360 1 1 111 +362 1 1 111 +364 1 1 111 +365 1 1 111 +366 1 1 111 +367 2 1 111 +368 1 1 111 +369 3 1 111 +37 2 1 111 +373 1 1 111 +374 1 1 111 +375 1 1 111 +377 1 1 111 +378 1 1 111 +379 1 1 111 +382 2 1 111 +384 3 1 111 +386 1 1 111 +389 1 1 111 +392 1 1 111 +393 1 1 111 +394 1 1 111 +395 2 1 111 +396 3 1 111 +397 2 1 111 +399 2 1 111 +4 1 1 111 +400 1 1 111 +401 5 1 111 +402 1 1 111 +403 3 1 111 +404 2 1 111 +406 4 1 111 +407 1 1 111 +409 3 1 111 +41 1 1 111 +411 1 1 111 +413 2 1 111 +414 2 1 111 +417 3 1 111 +418 1 1 111 +419 1 1 111 +42 2 1 111 +421 1 1 111 +424 2 1 111 +427 1 1 111 +429 2 1 111 +43 1 1 111 +430 3 1 111 +431 3 1 111 +432 1 1 111 +435 1 1 111 +436 1 1 111 +437 1 1 111 +438 3 1 111 +439 2 1 111 +44 1 1 111 +443 1 1 111 +444 1 1 111 +446 1 1 111 +448 1 1 111 +449 1 1 111 +452 1 1 111 +453 1 1 111 +454 3 1 111 +455 1 1 111 +457 1 1 111 +458 2 1 111 +459 2 1 111 +460 1 1 111 +462 2 1 111 +463 2 1 111 +466 3 1 111 +467 1 1 111 +468 4 1 111 +469 5 1 111 +47 1 1 111 +470 1 1 111 +472 1 1 111 +475 1 1 111 +477 1 1 111 +478 2 1 111 +479 1 1 111 +480 3 1 111 +481 1 1 111 +482 1 1 111 +483 1 1 111 +484 1 1 111 +485 1 1 111 +487 1 1 111 +489 4 1 111 +490 1 1 111 +491 1 1 111 +492 2 1 111 +493 1 1 111 +494 1 1 111 +495 1 1 111 +496 1 1 111 +497 1 1 111 +498 3 1 111 +5 3 1 111 +51 2 1 111 +53 1 1 111 +54 1 1 111 +57 1 1 111 +58 2 1 111 +64 1 1 111 +65 1 1 111 +66 1 1 111 +67 2 1 111 +69 1 1 111 +70 3 1 111 +72 2 1 111 +74 1 1 111 +76 2 1 111 +77 1 1 111 +78 1 1 111 +8 1 1 111 +80 1 1 111 +82 1 1 111 +83 2 1 111 +84 2 1 111 +85 1 1 111 +86 1 1 111 +87 1 1 111 +9 1 1 111 +90 3 1 111 +92 1 1 111 +95 2 1 111 +96 1 1 111 +97 2 1 111 +98 2 1 111 diff --git a/ql/src/test/results/clientpositive/groupby12.q.out b/ql/src/test/results/clientpositive/groupby12.q.out new file mode 100644 index 0000000000..ded303637f --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby12.q.out @@ -0,0 +1,445 @@ +PREHOOK: query: CREATE TABLE dest1_n106(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n106 +POSTHOOK: query: CREATE TABLE dest1_n106(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n106 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n106 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n106 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n106 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n106 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col0), count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 316 Data size: 32548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col1) (type: int), CAST( _col2 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n106 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n106 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1_n106 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n106 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n106 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n106 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n106 +POSTHOOK: Lineage: dest1_n106.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n106.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n106.* FROM dest1_n106 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n106 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n106.* FROM dest1_n106 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n106 +#### A masked pattern was here #### +3 1 +1 1 +2 1 +2 1 +2 1 +1 1 +1 1 +1 1 +2 1 +1 1 +1 1 +2 1 +3 1 +2 1 +2 1 +2 1 +1 1 +3 1 +2 1 +1 1 +1 1 +2 1 +1 1 +2 1 +4 1 +1 1 +1 1 +2 1 +2 1 +2 1 +1 1 +2 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +2 1 +2 1 +1 1 +3 1 +1 1 +4 1 +1 1 +1 1 +2 1 +2 1 +2 1 +2 1 +1 1 +1 1 +2 1 +2 1 +1 1 +1 1 +1 1 +1 1 +3 1 +1 1 +1 1 +1 1 +2 1 +1 1 +3 1 +1 1 +2 1 +1 1 +2 1 +3 1 +1 1 +1 1 +2 1 +1 1 +1 1 +2 1 +2 1 +2 1 +3 1 +2 1 +2 1 +1 1 +2 1 +2 1 +1 1 +2 1 +2 1 +1 1 +2 1 +2 1 +1 1 +1 1 +2 1 +5 1 +2 1 +1 1 +2 1 +2 1 +2 1 +2 1 +1 1 +2 1 +1 1 +1 1 +1 1 +1 1 +1 1 +2 1 +2 1 +1 1 +1 1 +2 1 +1 1 +1 1 +1 1 +2 1 +1 1 +1 1 +2 1 +3 1 +1 1 +1 1 +4 1 +2 1 +1 1 +2 1 +2 1 +2 1 +1 1 +1 1 +1 1 +1 1 +1 1 +2 1 +1 1 +1 1 +1 1 +1 1 +3 1 +1 1 +1 1 +1 1 +1 1 +2 1 +1 1 +2 1 +1 1 +3 1 +1 1 +3 1 +2 1 +3 1 +2 1 +2 1 +1 1 +2 1 +3 1 +1 1 +2 1 +1 1 +2 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +2 1 +2 1 +1 1 +5 1 +3 1 +1 1 +2 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +2 1 +1 1 +3 1 +2 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +2 1 +3 1 +1 1 +1 1 +1 1 +1 1 +1 1 +2 1 +3 1 +2 1 +2 1 +1 1 +1 1 +5 1 +1 1 +3 1 +2 1 +4 1 +1 1 +3 1 +1 1 +1 1 +2 1 +2 1 +3 1 +1 1 +1 1 +2 1 +1 1 +2 1 +1 1 +2 1 +1 1 +3 1 +3 1 +1 1 +1 1 +1 1 +1 1 +3 1 +2 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 1 +1 1 +1 1 +2 1 +2 1 +1 1 +2 1 +2 1 +3 1 +1 1 +4 1 +5 1 +1 1 +1 1 +1 1 +1 1 +1 1 +2 1 +1 1 +3 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +4 1 +1 1 +1 1 +2 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 1 +3 1 +2 1 +1 1 +1 1 +1 1 +2 1 +1 1 +1 1 +1 1 +2 1 +1 1 +3 1 +2 1 +1 1 +2 1 +1 1 +1 1 +1 1 +1 1 +1 1 +2 1 +2 1 +1 1 +1 1 +1 1 +1 1 +3 1 +1 1 +2 1 +1 1 +2 1 +2 1 diff --git a/ql/src/test/results/clientpositive/llap/groupby13.q.out b/ql/src/test/results/clientpositive/groupby13.q.out similarity index 55% rename from ql/src/test/results/clientpositive/llap/groupby13.q.out rename to ql/src/test/results/clientpositive/groupby13.q.out index 8c9b590b2e..a32a654afa 100644 --- a/ql/src/test/results/clientpositive/llap/groupby13.q.out +++ b/ql/src/test/results/clientpositive/groupby13.q.out @@ -42,57 +42,48 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: grpby_test - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: int_col_7 (type: int), int_col_7 (type: int), least(CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (-279) END,CASE WHEN (int_col_7 is not null) THEN (int_col_7) ELSE (476) END) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(_col2) - keys: _col0 (type: int), _col1 (type: int), _col2 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: grpby_test + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: int_col_7 (type: int), int_col_7 (type: int), least(CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (-279) END,CASE WHEN (int_col_7 is not null) THEN (int_col_7) ELSE (476) END) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) - mode: mergepartial + aggregations: max(_col2) + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col3 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/groupby1_limit.q.out b/ql/src/test/results/clientpositive/groupby1_limit.q.out new file mode 100644 index 0000000000..03bd541de2 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby1_limit.q.out @@ -0,0 +1,136 @@ +PREHOOK: query: CREATE TABLE dest1_n125(key INT, value DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n125 +POSTHOOK: query: CREATE TABLE dest1_n125(key INT, value DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n125 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1_n125 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n125 +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1_n125 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n125 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n125 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n125 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n125 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key ORDER BY src.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n125 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n125 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key ORDER BY src.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n125 +POSTHOOK: Lineage: dest1_n125.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n125.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n125.* FROM dest1_n125 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n125 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n125.* FROM dest1_n125 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n125 +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 diff --git a/ql/src/test/results/clientpositive/llap/groupby1_map.q.out b/ql/src/test/results/clientpositive/groupby1_map.q.out similarity index 58% rename from ql/src/test/results/clientpositive/llap/groupby1_map.q.out rename to ql/src/test/results/clientpositive/groupby1_map.q.out index 77d547ff41..81459ac6b6 100644 --- a/ql/src/test/results/clientpositive/llap/groupby1_map.q.out +++ b/ql/src/test/results/clientpositive/groupby1_map.q.out @@ -18,98 +18,70 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1_n61 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial + aggregations: sum(_col1) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n61 - Select Operator - expressions: _col0 (type: int), _col1 (type: double) - outputColumnNames: key, value - Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n61 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -121,7 +93,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n61 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -129,6 +101,30 @@ STAGE PLANS: Column Types: int, double Table: default.dest1_n61 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n61 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/llap/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out similarity index 58% rename from ql/src/test/results/clientpositive/llap/groupby1_map_nomap.q.out rename to ql/src/test/results/clientpositive/groupby1_map_nomap.q.out index 3a42667d2d..2a40ed5f39 100644 --- a/ql/src/test/results/clientpositive/llap/groupby1_map_nomap.q.out +++ b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out @@ -18,98 +18,70 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1_n67 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial + aggregations: sum(_col1) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n67 - Select Operator - expressions: _col0 (type: int), _col1 (type: double) - outputColumnNames: key, value - Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n67 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -121,7 +93,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n67 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -129,6 +101,30 @@ STAGE PLANS: Column Types: int, double Table: default.dest1_n67 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n67 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/llap/groupby1_map_skew.q.out b/ql/src/test/results/clientpositive/groupby1_map_skew.q.out similarity index 57% rename from ql/src/test/results/clientpositive/llap/groupby1_map_skew.q.out rename to ql/src/test/results/clientpositive/groupby1_map_skew.q.out index 367d27e32b..da4f502178 100644 --- a/ql/src/test/results/clientpositive/llap/groupby1_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby1_map_skew.q.out @@ -20,113 +20,95 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: partials + aggregations: sum(_col1) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n175 - Select Operator - expressions: _col0 (type: int), _col1 (type: double) - outputColumnNames: key, value - Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n175 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: final + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -146,6 +128,30 @@ STAGE PLANS: Column Types: int, double Table: default.dest1_n175 + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n175 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/llap/groupby1_noskew.q.out b/ql/src/test/results/clientpositive/groupby1_noskew.q.out similarity index 58% rename from ql/src/test/results/clientpositive/llap/groupby1_noskew.q.out rename to ql/src/test/results/clientpositive/groupby1_noskew.q.out index da2ba5a2b6..1745df03b1 100644 --- a/ql/src/test/results/clientpositive/llap/groupby1_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby1_noskew.q.out @@ -18,85 +18,58 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_g1_n0 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 3792 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 3792 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g1_n0 - Select Operator - expressions: _col0 (type: int), _col1 (type: double) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 3792 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 316 Data size: 3792 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: double) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 3792 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 3792 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g1_n0 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 3792 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -108,7 +81,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g1_n0 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -116,6 +89,30 @@ STAGE PLANS: Column Types: int, double Table: default.dest_g1_n0 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 316 Data size: 3792 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1_n0 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby2_limit.q.out b/ql/src/test/results/clientpositive/groupby2_limit.q.out new file mode 100644 index 0000000000..1579903938 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby2_limit.q.out @@ -0,0 +1,102 @@ +PREHOOK: query: EXPLAIN +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 diff --git a/ql/src/test/results/clientpositive/groupby2_map.q.out b/ql/src/test/results/clientpositive/groupby2_map.q.out new file mode 100644 index 0000000000..85bebf0000 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby2_map.q.out @@ -0,0 +1,160 @@ +PREHOOK: query: CREATE TABLE dest1_n16(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n16 +POSTHOOK: query: CREATE TABLE dest1_n16(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n16 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n16 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n16 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n16 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n16 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT _col1), sum(_col1) + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n16 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n16 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1_n16 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n16 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n16 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n16 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n16 +POSTHOOK: Lineage: dest1_n16.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n16.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n16.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n16.* FROM dest1_n16 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n16 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n16.* FROM dest1_n16 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n16 +#### A masked pattern was here #### +0 1 00.0 +1 71 116414.0 +2 69 225571.0 +3 62 332004.0 +4 74 452763.0 +5 6 5397.0 +6 5 6398.0 +7 6 7735.0 +8 8 8762.0 +9 7 91047.0 diff --git a/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out new file mode 100644 index 0000000000..d49e7de3b7 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out @@ -0,0 +1,316 @@ +PREHOOK: query: CREATE TABLE dest1_n38(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n38 +POSTHOOK: query: CREATE TABLE dest1_n38(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n38 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n38 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n38 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT _col1), sum(_col1), sum(DISTINCT _col1), count(_col2) + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: double), _col5 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n38 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n38 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1_n38 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n38 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n38 +POSTHOOK: Lineage: dest1_n38.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n38.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n38.c3 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1_n38.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n38.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n38.* FROM dest1_n38 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n38 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n38.* FROM dest1_n38 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n38 +#### A masked pattern was here #### +0 1 00.0 0 3 +1 71 116414.0 10044 115 +2 69 225571.0 15780 111 +3 62 332004.0 20119 99 +4 74 452763.0 30965 124 +5 6 5397.0 278 10 +6 5 6398.0 331 6 +7 6 7735.0 447 10 +8 8 8762.0 595 10 +9 7 91047.0 577 12 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n38 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n38 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT _col0), sum(_col1), sum(DISTINCT _col1), count(_col2) + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: double), _col5 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n38 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n38 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1_n38 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n38 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n38 +POSTHOOK: Lineage: dest1_n38.c1 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1_n38.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n38.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n38.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n38.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n38.* FROM dest1_n38 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n38 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n38.* FROM dest1_n38 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n38 +#### A masked pattern was here #### +0 1 00.0 0 3 +1 1 116414.0 10044 115 +2 1 225571.0 15780 111 +3 1 332004.0 20119 99 +4 1 452763.0 30965 124 +5 1 5397.0 278 10 +6 1 6398.0 331 6 +7 1 7735.0 447 10 +8 1 8762.0 595 10 +9 1 91047.0 577 12 diff --git a/ql/src/test/results/clientpositive/groupby2_map_skew.q.out b/ql/src/test/results/clientpositive/groupby2_map_skew.q.out new file mode 100644 index 0000000000..9783f9e10f --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby2_map_skew.q.out @@ -0,0 +1,187 @@ +PREHOOK: query: CREATE TABLE dest1_n10(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n10 +POSTHOOK: query: CREATE TABLE dest1_n10(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n10 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n10 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n10 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n10 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n10 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT _col1), sum(_col1) + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n10 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n10 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1_n10 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n10 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n10 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n10 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n10 +POSTHOOK: Lineage: dest1_n10.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n10.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n10.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n10.* FROM dest1_n10 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n10 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n10.* FROM dest1_n10 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n10 +#### A masked pattern was here #### +0 1 00.0 +1 71 116414.0 +2 69 225571.0 +3 62 332004.0 +4 74 452763.0 +5 6 5397.0 +6 5 6398.0 +7 6 7735.0 +8 8 8762.0 +9 7 91047.0 diff --git a/ql/src/test/results/clientpositive/groupby2_noskew.q.out b/ql/src/test/results/clientpositive/groupby2_noskew.q.out new file mode 100644 index 0000000000..5dd501fbe1 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby2_noskew.q.out @@ -0,0 +1,147 @@ +PREHOOK: query: CREATE TABLE dest_g2_n1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g2_n1 +POSTHOOK: query: CREATE TABLE dest_g2_n1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g2_n1 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2_n1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g2_n1 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2_n1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g2_n1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 316 Data size: 31916 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2_n1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2_n1 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2_n1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2_n1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g2_n1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2_n1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g2_n1 +POSTHOOK: Lineage: dest_g2_n1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2_n1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2_n1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest_g2_n1.* FROM dest_g2_n1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g2_n1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest_g2_n1.* FROM dest_g2_n1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g2_n1 +#### A masked pattern was here #### +0 1 00.0 +1 71 116414.0 +2 69 225571.0 +3 62 332004.0 +4 74 452763.0 +5 6 5397.0 +6 5 6398.0 +7 6 7735.0 +8 8 8762.0 +9 7 91047.0 diff --git a/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out new file mode 100644 index 0000000000..4d8a5aa02b --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out @@ -0,0 +1,150 @@ +PREHOOK: query: CREATE TABLE dest_g2_n3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g2_n3 +POSTHOOK: query: CREATE TABLE dest_g2_n3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g2_n3 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2_n3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g2_n3 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2_n3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g2_n3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 316 Data size: 36972 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2_n3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2_n3 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2_n3 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2_n3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g2_n3 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2_n3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g2_n3 +POSTHOOK: Lineage: dest_g2_n3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2_n3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2_n3.c3 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g2_n3.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2_n3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest_g2_n3.* FROM dest_g2_n3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g2_n3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest_g2_n3.* FROM dest_g2_n3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g2_n3 +#### A masked pattern was here #### +0 1 00.0 0 3 +1 71 116414.0 10044 115 +2 69 225571.0 15780 111 +3 62 332004.0 20119 99 +4 74 452763.0 30965 124 +5 6 5397.0 278 10 +6 5 6398.0 331 6 +7 6 7735.0 447 10 +8 8 8762.0 595 10 +9 7 91047.0 577 12 diff --git a/ql/src/test/results/clientpositive/groupby3_map.q.out b/ql/src/test/results/clientpositive/groupby3_map.q.out new file mode 100644 index 0000000000..22573cb77a --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby3_map.q.out @@ -0,0 +1,190 @@ +PREHOOK: query: CREATE TABLE dest1_n53(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n53 +POSTHOOK: query: CREATE TABLE dest1_n53(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n53 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n53 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n53 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n53 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n53 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), count(_col0), sum(DISTINCT _col0), count(DISTINCT _col0), max(_col0), min(_col0), sum(_col2), sum(_col1) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: bigint), _col5 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(VALUE._col4), min(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n53 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n53 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1_n53 + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n53 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n53 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n53 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n53 +POSTHOOK: Lineage: dest1_n53.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n53.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n53.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: dest1_n53.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n53.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n53.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n53.c7 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n53.c8 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n53.c9 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT +c1, +c2, +round(c3, 11) c3, +c4, +c5, +round(c6, 11) c6, +round(c7, 11) c7, +round(c8, 5) c8, +round(c9, 9) c9 +FROM dest1_n53 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n53 +#### A masked pattern was here #### +POSTHOOK: query: SELECT +c1, +c2, +round(c3, 11) c3, +c4, +c5, +round(c6, 11) c6, +round(c7, 11) c7, +round(c8, 5) c8, +round(c9, 9) c9 +FROM dest1_n53 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n53 +#### A masked pattern was here #### +130091.0 260.182 256.10355987055 98.0 0.0 142.92680950752 143.06995106519 20428.07288 20469.010897796 diff --git a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out new file mode 100644 index 0000000000..fbd6e4e894 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out @@ -0,0 +1,180 @@ +PREHOOK: query: CREATE TABLE dest1_n68(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n68 +POSTHOOK: query: CREATE TABLE dest1_n68(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n68 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n68 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)), + sum(DISTINCT substr(src.value, 5)), + count(DISTINCT substr(src.value, 5)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n68 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n68 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)), + sum(DISTINCT substr(src.value, 5)), + count(DISTINCT substr(src.value, 5)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n68 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), count(_col0), sum(DISTINCT _col0), count(DISTINCT _col0), max(_col0), min(_col0), sum(_col2), sum(_col1) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: bigint), _col5 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(VALUE._col4), min(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n68 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 4664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 4664 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4664 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n68 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1_n68 + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n68 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)), + sum(DISTINCT substr(src.value, 5)), + count(DISTINCT substr(src.value, 5)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n68 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n68 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)), + sum(DISTINCT substr(src.value, 5)), + count(DISTINCT substr(src.value, 5)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n68 +POSTHOOK: Lineage: dest1_n68.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n68.c10 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n68.c11 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1_n68.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n68.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: dest1_n68.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n68.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n68.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n68.c7 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n68.c8 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n68.c9 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n68.* FROM dest1_n68 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n68 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n68.* FROM dest1_n68 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n68 +#### A masked pattern was here #### +130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876000002 20469.010897795593 79136.0 309.0 diff --git a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out new file mode 100644 index 0000000000..c6cccd1c60 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out @@ -0,0 +1,197 @@ +PREHOOK: query: CREATE TABLE dest1_n131(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n131 +POSTHOOK: query: CREATE TABLE dest1_n131(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n131 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n131 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n131 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n131 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n131 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5)) + keys: substr(value, 5) (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 250 Data size: 323500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 323500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(DISTINCT KEY._col0:0._col0), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n131 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n131 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1_n131 + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n131 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n131 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n131 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n131 +POSTHOOK: Lineage: dest1_n131.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n131.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n131.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n131.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n131.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n131.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n131.c7 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n131.c8 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n131.c9 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT ROUND(c1, 1), ROUND(c2, 3), ROUND(c3, 5), ROUND(c4, 1), ROUND(c5, 1), ROUND(c6, 5), +ROUND(c7,5), ROUND(c8, 5), ROUND(c9, 5) FROM dest1_n131 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n131 +#### A masked pattern was here #### +POSTHOOK: query: SELECT ROUND(c1, 1), ROUND(c2, 3), ROUND(c3, 5), ROUND(c4, 1), ROUND(c5, 1), ROUND(c6, 5), +ROUND(c7,5), ROUND(c8, 5), ROUND(c9, 5) FROM dest1_n131 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n131 +#### A masked pattern was here #### +130091.0 260.182 256.10356 98.0 0.0 142.92681 143.06995 20428.07288 20469.0109 diff --git a/ql/src/test/results/clientpositive/llap/groupby3_noskew.q.out b/ql/src/test/results/clientpositive/groupby3_noskew.q.out similarity index 50% rename from ql/src/test/results/clientpositive/llap/groupby3_noskew.q.out rename to ql/src/test/results/clientpositive/groupby3_noskew.q.out index 0f3e3c1fc1..c62c01db6a 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby3_noskew.q.out @@ -38,74 +38,61 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1_n63 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: double), _col1 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: double), _col1 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(KEY._col0:0._col0), count(KEY._col0:0._col0), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), sum(VALUE._col0), sum(VALUE._col1) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n63 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(KEY._col0:0._col0), count(KEY._col0:0._col0), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), sum(VALUE._col0), sum(VALUE._col1) + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n63 - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) - outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -117,7 +104,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n63 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/llap/groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out similarity index 52% rename from ql/src/test/results/clientpositive/llap/groupby3_noskew_multi_distinct.q.out rename to ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out index 097dfb9c25..4f6ce09725 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out @@ -42,74 +42,61 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1_n24 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: double), _col1 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: double), _col1 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(KEY._col0:0._col0), count(KEY._col0:0._col0), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), sum(VALUE._col0), sum(VALUE._col1) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n24 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(KEY._col0:0._col0), count(KEY._col0:0._col0), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), sum(VALUE._col0), sum(VALUE._col1) + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n24 - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) - outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -121,7 +108,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n24 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/groupby4.q.out b/ql/src/test/results/clientpositive/groupby4.q.out new file mode 100644 index 0000000000..d861e47207 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby4.q.out @@ -0,0 +1,190 @@ +PREHOOK: query: CREATE TABLE dest1_n168(c1 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n168 +POSTHOOK: query: CREATE TABLE dest1_n168(c1 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n168 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n168 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n168 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n168 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n168 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(key, 1, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n168 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n168 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1_n168 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: c1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n168 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n168 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n168 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n168 +POSTHOOK: Lineage: dest1_n168.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n168.* FROM dest1_n168 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n168 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n168.* FROM dest1_n168 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n168 +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/ql/src/test/results/clientpositive/groupby4_map.q.out b/ql/src/test/results/clientpositive/groupby4_map.q.out new file mode 100644 index 0000000000..73d550f8d6 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby4_map.q.out @@ -0,0 +1,119 @@ +PREHOOK: query: CREATE TABLE dest1_n40(key INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n40 +POSTHOOK: query: CREATE TABLE dest1_n40(key INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n40 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1_n40 SELECT count(1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n40 +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1_n40 SELECT count(1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n40 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n40 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n40 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1_n40 + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n40 SELECT count(1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n40 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n40 SELECT count(1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n40 +POSTHOOK: Lineage: dest1_n40.key EXPRESSION [(src)src.null, ] +PREHOOK: query: SELECT dest1_n40.* FROM dest1_n40 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n40 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n40.* FROM dest1_n40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n40 +#### A masked pattern was here #### +500 diff --git a/ql/src/test/results/clientpositive/groupby4_map_skew.q.out b/ql/src/test/results/clientpositive/groupby4_map_skew.q.out new file mode 100644 index 0000000000..c1ad5b3a7f --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby4_map_skew.q.out @@ -0,0 +1,119 @@ +PREHOOK: query: CREATE TABLE dest1_n141(key INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n141 +POSTHOOK: query: CREATE TABLE dest1_n141(key INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n141 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1_n141 SELECT count(1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n141 +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1_n141 SELECT count(1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n141 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n141 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n141 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1_n141 + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n141 SELECT count(1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n141 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n141 SELECT count(1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n141 +POSTHOOK: Lineage: dest1_n141.key EXPRESSION [(src)src.null, ] +PREHOOK: query: SELECT dest1_n141.* FROM dest1_n141 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n141 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n141.* FROM dest1_n141 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n141 +#### A masked pattern was here #### +500 diff --git a/ql/src/test/results/clientpositive/groupby4_noskew.q.out b/ql/src/test/results/clientpositive/groupby4_noskew.q.out new file mode 100644 index 0000000000..842bda37c7 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby4_noskew.q.out @@ -0,0 +1,140 @@ +PREHOOK: query: CREATE TABLE dest1_n33(c1 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n33 +POSTHOOK: query: CREATE TABLE dest1_n33(c1 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n33 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n33 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n33 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n33 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n33 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(key, 1, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n33 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n33 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1_n33 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: c1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n33 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n33 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n33 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n33 +POSTHOOK: Lineage: dest1_n33.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n33.* FROM dest1_n33 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n33 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n33.* FROM dest1_n33 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n33 +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/ql/src/test/results/clientpositive/groupby5.q.out b/ql/src/test/results/clientpositive/groupby5.q.out new file mode 100644 index 0000000000..2eb244bad0 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby5.q.out @@ -0,0 +1,506 @@ +PREHOOK: query: CREATE TABLE dest1_n36(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n36 +POSTHOOK: query: CREATE TABLE dest1_n36(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n36 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest1_n36 +SELECT src.key, sum(substr(src.value,5)) +FROM src +GROUP BY src.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n36 +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest1_n36 +SELECT src.key, sum(substr(src.value,5)) +FROM src +GROUP BY src.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n36 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n36 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n36 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1_n36 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: INSERT OVERWRITE TABLE dest1_n36 +SELECT src.key, sum(substr(src.value,5)) +FROM src +GROUP BY src.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n36 +POSTHOOK: query: INSERT OVERWRITE TABLE dest1_n36 +SELECT src.key, sum(substr(src.value,5)) +FROM src +GROUP BY src.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n36 +POSTHOOK: Lineage: dest1_n36.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n36.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n36.* FROM dest1_n36 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n36 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n36.* FROM dest1_n36 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n36 +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 diff --git a/ql/src/test/results/clientpositive/groupby5_map.q.out b/ql/src/test/results/clientpositive/groupby5_map.q.out new file mode 100644 index 0000000000..c6cd69de5b --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby5_map.q.out @@ -0,0 +1,120 @@ +PREHOOK: query: CREATE TABLE dest1_n75(key INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n75 +POSTHOOK: query: CREATE TABLE dest1_n75(key INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n75 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1_n75 SELECT sum(src.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n75 +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1_n75 SELECT sum(src.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n75 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(key) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n75 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n75 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1_n75 + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n75 SELECT sum(src.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n75 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n75 SELECT sum(src.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n75 +POSTHOOK: Lineage: dest1_n75.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n75.* FROM dest1_n75 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n75 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n75.* FROM dest1_n75 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n75 +#### A masked pattern was here #### +130091 diff --git a/ql/src/test/results/clientpositive/groupby5_map_skew.q.out b/ql/src/test/results/clientpositive/groupby5_map_skew.q.out new file mode 100644 index 0000000000..f8c4ee77ca --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby5_map_skew.q.out @@ -0,0 +1,120 @@ +PREHOOK: query: CREATE TABLE dest1_n76(key INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n76 +POSTHOOK: query: CREATE TABLE dest1_n76(key INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n76 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1_n76 SELECT sum(src.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n76 +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1_n76 SELECT sum(src.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n76 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(key) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n76 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n76 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1_n76 + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n76 SELECT sum(src.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n76 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n76 SELECT sum(src.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n76 +POSTHOOK: Lineage: dest1_n76.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n76.* FROM dest1_n76 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n76 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n76.* FROM dest1_n76 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n76 +#### A masked pattern was here #### +130091 diff --git a/ql/src/test/results/clientpositive/llap/groupby5_noskew.q.out b/ql/src/test/results/clientpositive/groupby5_noskew.q.out similarity index 58% rename from ql/src/test/results/clientpositive/llap/groupby5_noskew.q.out rename to ql/src/test/results/clientpositive/groupby5_noskew.q.out index fe2dbffa5a..2363dfed27 100644 --- a/ql/src/test/results/clientpositive/llap/groupby5_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby5_noskew.q.out @@ -24,85 +24,58 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1_n31 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n31 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n31 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -114,7 +87,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n31 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -122,6 +95,30 @@ STAGE PLANS: Column Types: int, string Table: default.dest1_n31 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest1_n31 SELECT src.key, sum(substr(src.value,5)) FROM src diff --git a/ql/src/test/results/clientpositive/groupby6.q.out b/ql/src/test/results/clientpositive/groupby6.q.out new file mode 100644 index 0000000000..a72afb26b8 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby6.q.out @@ -0,0 +1,190 @@ +PREHOOK: query: CREATE TABLE dest1_n92(c1 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n92 +POSTHOOK: query: CREATE TABLE dest1_n92(c1 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n92 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n92 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n92 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n92 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n92 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(value, 5, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n92 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n92 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1_n92 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: c1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n92 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n92 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n92 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n92 +POSTHOOK: Lineage: dest1_n92.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n92.* FROM dest1_n92 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n92 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n92.* FROM dest1_n92 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n92 +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/ql/src/test/results/clientpositive/groupby6_map.q.out b/ql/src/test/results/clientpositive/groupby6_map.q.out new file mode 100644 index 0000000000..9f8c791d25 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby6_map.q.out @@ -0,0 +1,152 @@ +PREHOOK: query: CREATE TABLE dest1_n19(c1 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n19 +POSTHOOK: query: CREATE TABLE dest1_n19(c1 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n19 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n19 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n19 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n19 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n19 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(value, 5, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n19 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n19 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1_n19 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n19 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n19 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n19 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n19 +POSTHOOK: Lineage: dest1_n19.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n19.* FROM dest1_n19 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n19 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n19.* FROM dest1_n19 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n19 +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/ql/src/test/results/clientpositive/groupby6_map_skew.q.out b/ql/src/test/results/clientpositive/groupby6_map_skew.q.out new file mode 100644 index 0000000000..43fbc841b1 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby6_map_skew.q.out @@ -0,0 +1,177 @@ +PREHOOK: query: CREATE TABLE dest1_n98(c1 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n98 +POSTHOOK: query: CREATE TABLE dest1_n98(c1 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n98 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n98 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n98 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n98 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n98 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(value, 5, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n98 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n98 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1_n98 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n98 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n98 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n98 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n98 +POSTHOOK: Lineage: dest1_n98.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n98.* FROM dest1_n98 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n98 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n98.* FROM dest1_n98 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n98 +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/ql/src/test/results/clientpositive/groupby6_noskew.q.out b/ql/src/test/results/clientpositive/groupby6_noskew.q.out new file mode 100644 index 0000000000..9545ca3873 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby6_noskew.q.out @@ -0,0 +1,140 @@ +PREHOOK: query: CREATE TABLE dest1_n100(c1 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n100 +POSTHOOK: query: CREATE TABLE dest1_n100(c1 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1_n100 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n100 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n100 +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1_n100 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n100 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(value, 5, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n100 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n100 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1_n100 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: c1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n100 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n100 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1_n100 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n100 +POSTHOOK: Lineage: dest1_n100.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1_n100.* FROM dest1_n100 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n100 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1_n100.* FROM dest1_n100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n100 +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/ql/src/test/results/clientpositive/llap/groupby7_map.q.out b/ql/src/test/results/clientpositive/groupby7_map.q.out similarity index 58% rename from ql/src/test/results/clientpositive/llap/groupby7_map.q.out rename to ql/src/test/results/clientpositive/groupby7_map.q.out index a37e6cf957..08b0db51e1 100644 --- a/ql/src/test/results/clientpositive/llap/groupby7_map.q.out +++ b/ql/src/test/results/clientpositive/groupby7_map.q.out @@ -32,171 +32,91 @@ POSTHOOK: Output: default@dest1_n82 POSTHOOK: Output: default@dest2_n19 STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 + Stage-4 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial + aggregations: sum(substr(value, 5)) + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n82 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial + aggregations: sum(substr(value, 5)) + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n82 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n19 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -208,7 +128,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n82 - Stage: Stage-4 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -216,6 +136,86 @@ STAGE PLANS: Column Types: int, string Table: default.dest1_n82 + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2_n19 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n19 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 Move Operator tables: @@ -226,13 +226,29 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2_n19 - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest2_n19 + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1_n82 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/llap/groupby7_map_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out similarity index 58% rename from ql/src/test/results/clientpositive/llap/groupby7_map_multi_single_reducer.q.out rename to ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out index f2cd96aa1a..e17ef4e8da 100644 --- a/ql/src/test/results/clientpositive/llap/groupby7_map_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out @@ -32,144 +32,103 @@ POSTHOOK: Output: default@dest1_n15 POSTHOOK: Output: default@dest2_n3 STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 + Stage-1 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: substr(value, 5) (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Forward + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: substr(value, 5) (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n15 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n15 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n3 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -181,7 +140,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n15 - Stage: Stage-4 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -189,6 +148,38 @@ STAGE PLANS: Column Types: int, string Table: default.dest1_n15 + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2_n3 + Stage: Stage-1 Move Operator tables: @@ -199,13 +190,29 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2_n3 - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest2_n3 + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1_n15 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/llap/groupby7_map_skew.q.out b/ql/src/test/results/clientpositive/groupby7_map_skew.q.out similarity index 54% rename from ql/src/test/results/clientpositive/llap/groupby7_map_skew.q.out rename to ql/src/test/results/clientpositive/groupby7_map_skew.q.out index 781547ed0f..4a06aa7830 100644 --- a/ql/src/test/results/clientpositive/llap/groupby7_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby7_map_skew.q.out @@ -34,203 +34,117 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-8 depends on stages: Stage-1, Stage-5, Stage-9 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: partials + aggregations: sum(substr(value, 5)) + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: final + aggregations: sum(substr(value, 5)) + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n21 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: partials - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n5 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n21 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: final + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -250,6 +164,112 @@ STAGE PLANS: Column Types: int, string Table: default.dest1_n21 + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2_n5 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 Move Operator tables: @@ -260,13 +280,29 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2_n5 - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest2_n5 + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1_n21 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/llap/groupby7_noskew.q.out b/ql/src/test/results/clientpositive/groupby7_noskew.q.out similarity index 58% rename from ql/src/test/results/clientpositive/llap/groupby7_noskew.q.out rename to ql/src/test/results/clientpositive/groupby7_noskew.q.out index 8346f49a2d..d60d67e0a8 100644 --- a/ql/src/test/results/clientpositive/llap/groupby7_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby7_noskew.q.out @@ -32,145 +32,72 @@ POSTHOOK: Output: default@dest1_n101 POSTHOOK: Output: default@dest2_n28 STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 + Stage-4 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: substr(value, 5) (type: string) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: substr(value, 5) (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n101 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n28 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: substr(value, 5) (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n101 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -182,7 +109,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n101 - Stage: Stage-4 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -190,6 +117,80 @@ STAGE PLANS: Column Types: int, string Table: default.dest1_n101 + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2_n28 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: substr(value, 5) (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n28 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 Move Operator tables: @@ -200,13 +201,29 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2_n28 - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest2_n28 + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1_n101 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out new file mode 100644 index 0000000000..45a2afcebc --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out @@ -0,0 +1,314 @@ +PREHOOK: query: CREATE TABLE DEST1_n170(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1_n170 +POSTHOOK: query: CREATE TABLE DEST1_n170(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1_n170 +PREHOOK: query: CREATE TABLE DEST2_n42(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST2_n42 +POSTHOOK: query: CREATE TABLE DEST2_n42(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST2_n42 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1_n170 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 +INSERT OVERWRITE TABLE DEST2_n42 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n170 +PREHOOK: Output: default@dest2_n42 +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1_n170 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 +INSERT OVERWRITE TABLE DEST2_n42 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n170 +POSTHOOK: Output: default@dest2_n42 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-8 + Stage-5 depends on stages: Stage-3 + Stage-7 depends on stages: Stage-1, Stage-5, Stage-8 + Stage-6 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: substr(value, 5) (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n170 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n170 + + Stage: Stage-4 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1_n170 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2_n42 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n42 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n42 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1_n170 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 +INSERT OVERWRITE TABLE DEST2_n42 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n170 +PREHOOK: Output: default@dest2_n42 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1_n170 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 +INSERT OVERWRITE TABLE DEST2_n42 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n170 +POSTHOOK: Output: default@dest2_n42 +POSTHOOK: Lineage: dest1_n170.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n170.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2_n42.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2_n42.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1_n170.* FROM DEST1_n170 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n170 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1_n170.* FROM DEST1_n170 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n170 +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +PREHOOK: query: SELECT DEST2_n42.* FROM DEST2_n42 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2_n42 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2_n42.* FROM DEST2_n42 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2_n42 +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 diff --git a/ql/src/test/results/clientpositive/groupby8.q.out b/ql/src/test/results/clientpositive/groupby8.q.out new file mode 100644 index 0000000000..59a2334014 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby8.q.out @@ -0,0 +1,1946 @@ +PREHOOK: query: CREATE TABLE DEST1_n71(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1_n71 +POSTHOOK: query: CREATE TABLE DEST1_n71(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1_n71 +PREHOOK: query: CREATE TABLE DEST2_n15(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST2_n15 +POSTHOOK: query: CREATE TABLE DEST2_n15(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST2_n15 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n71 +PREHOOK: Output: default@dest2_n15 +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n71 +POSTHOOK: Output: default@dest2_n15 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-5 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string), substr(value, 5) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n71 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n71 + + Stage: Stage-4 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1_n71 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2_n15 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), substr(value, 5) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n15 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n15 + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n71 +PREHOOK: Output: default@dest2_n15 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n71 +POSTHOOK: Output: default@dest2_n15 +POSTHOOK: Lineage: dest1_n71.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n71.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2_n15.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2_n15.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1_n71.* FROM DEST1_n71 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n71 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1_n71.* FROM DEST1_n71 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n71 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2_n15.* FROM DEST2_n15 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2_n15 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2_n15.* FROM DEST2_n15 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2_n15 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n71 +PREHOOK: Output: default@dest2_n15 +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n71 +POSTHOOK: Output: default@dest2_n15 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-5 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string), substr(value, 5) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n71 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n71 + + Stage: Stage-4 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1_n71 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2_n15 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), substr(value, 5) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n15 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n15 + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n71 +PREHOOK: Output: default@dest2_n15 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n71 +POSTHOOK: Output: default@dest2_n15 +POSTHOOK: Lineage: dest1_n71.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n71.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2_n15.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2_n15.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1_n71.* FROM DEST1_n71 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n71 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1_n71.* FROM DEST1_n71 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n71 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2_n15.* FROM DEST2_n15 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2_n15 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2_n15.* FROM DEST2_n15 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2_n15 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 diff --git a/ql/src/test/results/clientpositive/llap/groupby8_map.q.out b/ql/src/test/results/clientpositive/groupby8_map.q.out similarity index 52% rename from ql/src/test/results/clientpositive/llap/groupby8_map.q.out rename to ql/src/test/results/clientpositive/groupby8_map.q.out index 7a62ab1963..c004bcaf13 100644 --- a/ql/src/test/results/clientpositive/llap/groupby8_map.q.out +++ b/ql/src/test/results/clientpositive/groupby8_map.q.out @@ -32,143 +32,102 @@ POSTHOOK: Output: default@dest1_n136 POSTHOOK: Output: default@dest2_n35 STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 + Stage-1 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string), substr(value, 5) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Forward + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string), substr(value, 5) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n136 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: complete + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n136 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n35 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: complete + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n35 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -180,7 +139,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n136 - Stage: Stage-4 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -188,6 +147,38 @@ STAGE PLANS: Column Types: int, string Table: default.dest1_n136 + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2_n35 + Stage: Stage-1 Move Operator tables: @@ -198,13 +189,29 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2_n35 - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest2_n35 + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1_n136 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby8_map_skew.q.out b/ql/src/test/results/clientpositive/groupby8_map_skew.q.out new file mode 100644 index 0000000000..6ae2b31edd --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby8_map_skew.q.out @@ -0,0 +1,957 @@ +PREHOOK: query: CREATE TABLE DEST1_n87(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1_n87 +POSTHOOK: query: CREATE TABLE DEST1_n87(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1_n87 +PREHOOK: query: CREATE TABLE DEST2_n22(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST2_n22 +POSTHOOK: query: CREATE TABLE DEST2_n22(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST2_n22 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1_n87 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2_n22 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n87 +PREHOOK: Output: default@dest2_n22 +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1_n87 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2_n22 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n87 +POSTHOOK: Output: default@dest2_n22 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-8 depends on stages: Stage-1, Stage-5, Stage-9 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), substr(value, 5) (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), substr(value, 5) (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n87 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n87 + + Stage: Stage-4 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1_n87 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2_n22 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n22 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n22 + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1_n87 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2_n22 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n87 +PREHOOK: Output: default@dest2_n22 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1_n87 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2_n22 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1_n87 +POSTHOOK: Output: default@dest2_n22 +POSTHOOK: Lineage: dest1_n87.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1_n87.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2_n22.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2_n22.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1_n87.* FROM DEST1_n87 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1_n87 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1_n87.* FROM DEST1_n87 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1_n87 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2_n22.* FROM DEST2_n22 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2_n22 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2_n22.* FROM DEST2_n22 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2_n22 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 diff --git a/ql/src/test/results/clientpositive/llap/groupby8_noskew.q.out b/ql/src/test/results/clientpositive/groupby8_noskew.q.out similarity index 59% rename from ql/src/test/results/clientpositive/llap/groupby8_noskew.q.out rename to ql/src/test/results/clientpositive/groupby8_noskew.q.out index d3a55423c5..fdc5a60438 100644 --- a/ql/src/test/results/clientpositive/llap/groupby8_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby8_noskew.q.out @@ -32,131 +32,90 @@ POSTHOOK: Output: default@dest1_n48 POSTHOOK: Output: default@dest2_n9 STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 + Stage-1 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string), substr(value, 5) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Forward + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string), substr(value, 5) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n48 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n9 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n48 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2_n9 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -168,7 +127,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n48 - Stage: Stage-4 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -176,6 +135,38 @@ STAGE PLANS: Column Types: int, string Table: default.dest1_n48 + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2_n9 + Stage: Stage-1 Move Operator tables: @@ -186,13 +177,29 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2_n9 - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest2_n9 + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: int), value (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1_n48 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/llap/acid_nullscan.q.out b/ql/src/test/results/clientpositive/llap/acid_nullscan.q.out deleted file mode 100644 index 77910929b4..0000000000 --- a/ql/src/test/results/clientpositive/llap/acid_nullscan.q.out +++ /dev/null @@ -1,186 +0,0 @@ -PREHOOK: query: CREATE TABLE acid_vectorized_n1(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@acid_vectorized_n1 -POSTHOOK: query: CREATE TABLE acid_vectorized_n1(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@acid_vectorized_n1 -PREHOOK: query: insert into table acid_vectorized_n1 select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: default@acid_vectorized_n1 -POSTHOOK: query: insert into table acid_vectorized_n1 select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: default@acid_vectorized_n1 -POSTHOOK: Lineage: acid_vectorized_n1.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -POSTHOOK: Lineage: acid_vectorized_n1.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] -PREHOOK: query: insert into table acid_vectorized_n1 values (1, 'bar') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@acid_vectorized_n1 -POSTHOOK: query: insert into table acid_vectorized_n1 values (1, 'bar') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@acid_vectorized_n1 -POSTHOOK: Lineage: acid_vectorized_n1.a SCRIPT [] -POSTHOOK: Lineage: acid_vectorized_n1.b SCRIPT [] -PREHOOK: query: explain extended -select sum(a) from acid_vectorized_n1 where false -PREHOOK: type: QUERY -PREHOOK: Input: default@acid_vectorized_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain extended -select sum(a) from acid_vectorized_n1 where false -POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid_vectorized_n1 -#### A masked pattern was here #### -OPTIMIZED SQL: SELECT SUM(`a`) AS `$f0` -FROM `default`.`acid_vectorized_n1` -LIMIT 0 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: acid_vectorized_n1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: - nullscan://null/default.acid_vectorized_n1/part_ [acid_vectorized_n1] - Path -> Partition: - nullscan://null/default.acid_vectorized_n1/part_ - Partition - input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"a":"true","b":"true"}} - bucket_count 2 - bucket_field_name a - bucketing_version 2 - column.name.delimiter , - columns a,b - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.acid_vectorized_n1 - numFiles 3 - numRows 11 - rawDataSize 0 - serialization.ddl struct acid_vectorized_n1 { i32 a, string b} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - totalSize 2583 - transactional true - transactional_properties default -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"a":"true","b":"true"}} - bucket_count 2 - bucket_field_name a - bucketing_version 2 - column.name.delimiter , - columns a,b - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.acid_vectorized_n1 - numFiles 3 - numRows 11 - rawDataSize 0 - serialization.ddl struct acid_vectorized_n1 { i32 a, string b} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2583 - transactional true - transactional_properties default -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid_vectorized_n1 - name: default.acid_vectorized_n1 - Truncated Path -> Alias: - nullscan://null/default.acid_vectorized_n1/part_ [acid_vectorized_n1] - Reducer 2 - Execution mode: vectorized, llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(a) from acid_vectorized_n1 where false -PREHOOK: type: QUERY -PREHOOK: Input: default@acid_vectorized_n1 -#### A masked pattern was here #### -POSTHOOK: query: select sum(a) from acid_vectorized_n1 where false -POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid_vectorized_n1 -#### A masked pattern was here #### -NULL diff --git a/ql/src/test/results/clientpositive/llap/alias_casted_column.q.out b/ql/src/test/results/clientpositive/llap/alias_casted_column.q.out deleted file mode 100644 index a6cd5689c2..0000000000 --- a/ql/src/test/results/clientpositive/llap/alias_casted_column.q.out +++ /dev/null @@ -1,46 +0,0 @@ -PREHOOK: query: explain select key from (select cast(key as int) from src )t -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select key from (select cast(key as int) from src )t -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - Select Operator - expressions: UDFToInteger(key) (type: int) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: explain select key2 from (select cast(key as int) key2 from src )t -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select key2 from (select cast(key as int) key2 from src )t -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - Select Operator - expressions: UDFToInteger(key) (type: int) - outputColumnNames: _col0 - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/allcolref_in_udf.q.out b/ql/src/test/results/clientpositive/llap/allcolref_in_udf.q.out deleted file mode 100644 index a50f9ad0cb..0000000000 --- a/ql/src/test/results/clientpositive/llap/allcolref_in_udf.q.out +++ /dev/null @@ -1,238 +0,0 @@ -PREHOOK: query: explain -select concat(*),array(*) from src where key < 100 limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select concat(*),array(*) from src where key < 100 limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) - Filter Operator - predicate: (UDFToDouble(key) < 100.0D) (type: boolean) - Select Operator - expressions: concat(key, value) (type: string), array(key,value) (type: array) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 10 - ListSink - -PREHOOK: query: select concat(*),array(*) from src where key < 100 limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select concat(*),array(*) from src where key < 100 limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -86val_86 ["86","val_86"] -27val_27 ["27","val_27"] -98val_98 ["98","val_98"] -66val_66 ["66","val_66"] -37val_37 ["37","val_37"] -15val_15 ["15","val_15"] -82val_82 ["82","val_82"] -17val_17 ["17","val_17"] -0val_0 ["0","val_0"] -57val_57 ["57","val_57"] -PREHOOK: query: explain -select stack(2, *) as (e1,e2,e3) from ( - select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) - from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select stack(2, *) as (e1,e2,e3) from ( - select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) - from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), concat(key, value) (type: string), (UDFToDouble(key) + 1.0D) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col3 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: double) - Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - filterExpr: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), concat(key, value) (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col3 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: double) - Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: double) - 1 _col3 (type: double) - outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 - Statistics: Num rows: 262 Data size: 189688 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 2 (type: int), concat(_col0, _col1, _col4, _col5) (type: string), _col2 (type: string), _col6 (type: string), concat(_col0, _col1, _col4) (type: string), concat(_col0, _col4, _col5) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 262 Data size: 242088 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 262 Data size: 242088 Basic stats: COMPLETE Column stats: COMPLETE - function name: stack - Select Operator - expressions: col0 (type: string), col1 (type: string), col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 262 Data size: 2096 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select stack(2, *) as (e1,e2,e3) from ( - select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) - from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select stack(2, *) as (e1,e2,e3) from ( - select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) - from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -4val_45val_5 4val_4 5val_5 -4val_45 NULL 5val_5 -4val_45val_5 4val_4 5val_5 -4val_45 NULL 5val_5 -4val_45val_5 4val_4 5val_5 -4val_45 NULL 5val_5 -8val_89val_9 8val_8 9val_9 -8val_89 NULL 9val_9 -9val_910val_10 9val_9 10val_10 -9val_910 NULL 10val_10 -PREHOOK: query: create table allcolref as select array(key, value) from src -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@allcolref -POSTHOOK: query: create table allcolref as select array(key, value) from src -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@allcolref -POSTHOOK: Lineage: allcolref._c0 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain select explode(*) as x from allcolref limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@allcolref -#### A masked pattern was here #### -POSTHOOK: query: explain select explode(*) as x from allcolref limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@allcolref -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - TableScan - alias: allcolref - Select Operator - expressions: _c0 (type: array) - outputColumnNames: _col0 - UDTF Operator - function name: explode - Select Operator - expressions: col (type: string) - outputColumnNames: _col0 - Limit - Number of rows: 10 - ListSink - -PREHOOK: query: select explode(*) as x from allcolref limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@allcolref -#### A masked pattern was here #### -POSTHOOK: query: select explode(*) as x from allcolref limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@allcolref -#### A masked pattern was here #### -238 -val_238 -86 -val_86 -311 -val_311 -27 -val_27 -165 -val_165 diff --git a/ql/src/test/results/clientpositive/llap/ambiguous_col.q.out b/ql/src/test/results/clientpositive/llap/ambiguous_col.q.out deleted file mode 100644 index 1ff7d793a3..0000000000 --- a/ql/src/test/results/clientpositive/llap/ambiguous_col.q.out +++ /dev/null @@ -1,448 +0,0 @@ -PREHOOK: query: explain select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 39 Data size: 6942 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 39 Data size: 6942 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 39 Data size: 6942 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 128 val_128 -128 128 val_128 -128 128 val_128 -146 146 val_146 -146 146 val_146 -150 150 val_150 -213 213 val_213 -213 213 val_213 -224 224 val_224 -224 224 val_224 -238 238 val_238 -238 238 val_238 -255 255 val_255 -255 255 val_255 -273 273 val_273 -273 273 val_273 -273 273 val_273 -278 278 val_278 -278 278 val_278 -311 311 val_311 -311 311 val_311 -311 311 val_311 -369 369 val_369 -369 369 val_369 -369 369 val_369 -401 401 val_401 -401 401 val_401 -401 401 val_401 -401 401 val_401 -401 401 val_401 -406 406 val_406 -406 406 val_406 -406 406 val_406 -406 406 val_406 -66 66 val_66 -98 98 val_98 -98 98 val_98 -PREHOOK: query: explain select * from (select a.key, a.`[k].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from (select a.key, a.`[k].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from (select a.key, a.`[k].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select * from (select a.key, a.`[k].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 128 -128 128 -128 128 -146 146 -146 146 -150 150 -213 213 -213 213 -224 224 -224 224 -238 238 -238 238 -255 255 -255 255 -273 273 -273 273 -273 273 -278 278 -278 278 -311 311 -311 311 -311 311 -369 369 -369 369 -369 369 -401 401 -401 401 -401 401 -401 401 -401 401 -406 406 -406 406 -406 406 -406 406 -66 66 -98 98 -98 98 -PREHOOK: query: explain select * from (select a.key, a.key from (select * from src) a join (select * from src1) b on (a.key = b.key)) t -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from (select a.key, a.key from (select * from src) a join (select * from src1) b on (a.key = b.key)) t -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 39 Data size: 6786 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 39 Data size: 6786 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from (select a.key, a.key from (select * from src) a join (select * from src1) b on (a.key = b.key)) t -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select * from (select a.key, a.key from (select * from src) a join (select * from src1) b on (a.key = b.key)) t -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 128 -128 128 -128 128 -146 146 -146 146 -150 150 -213 213 -213 213 -224 224 -224 224 -238 238 -238 238 -255 255 -255 255 -273 273 -273 273 -273 273 -278 278 -278 278 -311 311 -311 311 -311 311 -369 369 -369 369 -369 369 -401 401 -401 401 -401 401 -401 401 -401 401 -406 406 -406 406 -406 406 -406 406 -66 66 -98 98 -98 98 -PREHOOK: query: explain select count(*) from (select key, key from src) subq -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select count(*) from (select key, key from src) subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from (select key, key from src) subq -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from (select key, key from src) subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -500 diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_deep_filters.q.out b/ql/src/test/results/clientpositive/llap/annotate_stats_deep_filters.q.out deleted file mode 100644 index 8e5ccfebdd..0000000000 --- a/ql/src/test/results/clientpositive/llap/annotate_stats_deep_filters.q.out +++ /dev/null @@ -1,280 +0,0 @@ -PREHOOK: query: create table over1k_n4( -t tinyint, -si smallint, -i int, -b bigint, -f float, -d double, -bo boolean, -s string, -ts timestamp, -`dec` decimal(4,2), -bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1k_n4 -POSTHOOK: query: create table over1k_n4( -t tinyint, -si smallint, -i int, -b bigint, -f float, -d double, -bo boolean, -s string, -ts timestamp, -`dec` decimal(4,2), -bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1k_n4 -PREHOOK: query: load data local inpath '../../data/files/over1k' overwrite into table over1k_n4 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over1k_n4 -POSTHOOK: query: load data local inpath '../../data/files/over1k' overwrite into table over1k_n4 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over1k_n4 -PREHOOK: query: load data local inpath '../../data/files/over1k' into table over1k_n4 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over1k_n4 -POSTHOOK: query: load data local inpath '../../data/files/over1k' into table over1k_n4 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over1k_n4 -PREHOOK: query: analyze table over1k_n4 compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@over1k_n4 -PREHOOK: Output: default@over1k_n4 -POSTHOOK: query: analyze table over1k_n4 compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1k_n4 -POSTHOOK: Output: default@over1k_n4 -PREHOOK: query: analyze table over1k_n4 compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@over1k_n4 -PREHOOK: Output: default@over1k_n4 -#### A masked pattern was here #### -POSTHOOK: query: analyze table over1k_n4 compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@over1k_n4 -POSTHOOK: Output: default@over1k_n4 -#### A masked pattern was here #### -PREHOOK: query: explain select count(*) from over1k_n4 where ( -(t=1 and si=2) -or (t=2 and si=3) -or (t=3 and si=4) -or (t=4 and si=5) -or (t=5 and si=6) -or (t=6 and si=7) -or (t=7 and si=8) -or (t=9 and si=10) -or (t=10 and si=11) -or (t=11 and si=12) -or (t=12 and si=13) -or (t=13 and si=14) -or (t=14 and si=15) -or (t=15 and si=16) -or (t=16 and si=17) -or (t=17 and si=18) -or (t=27 and si=28) -or (t=37 and si=38) -or (t=47 and si=48) -or (t=52 and si=53)) -PREHOOK: type: QUERY -PREHOOK: Input: default@over1k_n4 -#### A masked pattern was here #### -POSTHOOK: query: explain select count(*) from over1k_n4 where ( -(t=1 and si=2) -or (t=2 and si=3) -or (t=3 and si=4) -or (t=4 and si=5) -or (t=5 and si=6) -or (t=6 and si=7) -or (t=7 and si=8) -or (t=9 and si=10) -or (t=10 and si=11) -or (t=11 and si=12) -or (t=12 and si=13) -or (t=13 and si=14) -or (t=14 and si=15) -or (t=15 and si=16) -or (t=16 and si=17) -or (t=17 and si=18) -or (t=27 and si=28) -or (t=37 and si=38) -or (t=47 and si=48) -or (t=52 and si=53)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1k_n4 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over1k_n4 - filterExpr: (((t = 1Y) and (si = 2S)) or ((t = 2Y) and (si = 3S)) or ((t = 3Y) and (si = 4S)) or ((t = 4Y) and (si = 5S)) or ((t = 5Y) and (si = 6S)) or ((t = 6Y) and (si = 7S)) or ((t = 7Y) and (si = 8S)) or ((t = 9Y) and (si = 10S)) or ((t = 10Y) and (si = 11S)) or ((t = 11Y) and (si = 12S)) or ((t = 12Y) and (si = 13S)) or ((t = 13Y) and (si = 14S)) or ((t = 14Y) and (si = 15S)) or ((t = 15Y) and (si = 16S)) or ((t = 16Y) and (si = 17S)) or ((t = 17Y) and (si = 18S)) or ((t = 27Y) and (si = 28S)) or ((t = 37Y) and (si = 38S)) or ((t = 47Y) and (si = 48S)) or ((t = 52Y) and (si = 53S))) (type: boolean) - Statistics: Num rows: 2098 Data size: 16744 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (((t = 1Y) and (si = 2S)) or ((t = 2Y) and (si = 3S)) or ((t = 3Y) and (si = 4S)) or ((t = 4Y) and (si = 5S)) or ((t = 5Y) and (si = 6S)) or ((t = 6Y) and (si = 7S)) or ((t = 7Y) and (si = 8S)) or ((t = 9Y) and (si = 10S)) or ((t = 10Y) and (si = 11S)) or ((t = 11Y) and (si = 12S)) or ((t = 12Y) and (si = 13S)) or ((t = 13Y) and (si = 14S)) or ((t = 14Y) and (si = 15S)) or ((t = 15Y) and (si = 16S)) or ((t = 16Y) and (si = 17S)) or ((t = 17Y) and (si = 18S)) or ((t = 27Y) and (si = 28S)) or ((t = 37Y) and (si = 38S)) or ((t = 47Y) and (si = 48S)) or ((t = 52Y) and (si = 53S))) (type: boolean) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.95 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select count(*) from over1k_n4 where ( -(t=1 and si=2) -or (t=2 and si=3) -or (t=3 and si=4) -or (t=4 and si=5) -or (t=5 and si=6) -or (t=6 and si=7) -or (t=7 and si=8) -or (t=9 and si=10) -or (t=10 and si=11) -or (t=11 and si=12) -or (t=12 and si=13) -or (t=13 and si=14) -or (t=14 and si=15) -or (t=15 and si=16) -or (t=16 and si=17) -or (t=17 and si=18) -or (t=27 and si=28) -or (t=37 and si=38) -or (t=47 and si=48) -or (t=52 and si=53)) -PREHOOK: type: QUERY -PREHOOK: Input: default@over1k_n4 -#### A masked pattern was here #### -POSTHOOK: query: explain select count(*) from over1k_n4 where ( -(t=1 and si=2) -or (t=2 and si=3) -or (t=3 and si=4) -or (t=4 and si=5) -or (t=5 and si=6) -or (t=6 and si=7) -or (t=7 and si=8) -or (t=9 and si=10) -or (t=10 and si=11) -or (t=11 and si=12) -or (t=12 and si=13) -or (t=13 and si=14) -or (t=14 and si=15) -or (t=15 and si=16) -or (t=16 and si=17) -or (t=17 and si=18) -or (t=27 and si=28) -or (t=37 and si=38) -or (t=47 and si=48) -or (t=52 and si=53)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1k_n4 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over1k_n4 - filterExpr: (((t = 1Y) and (si = 2S)) or ((t = 2Y) and (si = 3S)) or ((t = 3Y) and (si = 4S)) or ((t = 4Y) and (si = 5S)) or ((t = 5Y) and (si = 6S)) or ((t = 6Y) and (si = 7S)) or ((t = 7Y) and (si = 8S)) or ((t = 9Y) and (si = 10S)) or ((t = 10Y) and (si = 11S)) or ((t = 11Y) and (si = 12S)) or ((t = 12Y) and (si = 13S)) or ((t = 13Y) and (si = 14S)) or ((t = 14Y) and (si = 15S)) or ((t = 15Y) and (si = 16S)) or ((t = 16Y) and (si = 17S)) or ((t = 17Y) and (si = 18S)) or ((t = 27Y) and (si = 28S)) or ((t = 37Y) and (si = 38S)) or ((t = 47Y) and (si = 48S)) or ((t = 52Y) and (si = 53S))) (type: boolean) - Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((t = 1Y) and (si = 2S)) or ((t = 2Y) and (si = 3S)) or ((t = 3Y) and (si = 4S)) or ((t = 4Y) and (si = 5S)) or ((t = 5Y) and (si = 6S)) or ((t = 6Y) and (si = 7S)) or ((t = 7Y) and (si = 8S)) or ((t = 9Y) and (si = 10S)) or ((t = 10Y) and (si = 11S)) or ((t = 11Y) and (si = 12S)) or ((t = 12Y) and (si = 13S)) or ((t = 13Y) and (si = 14S)) or ((t = 14Y) and (si = 15S)) or ((t = 15Y) and (si = 16S)) or ((t = 16Y) and (si = 17S)) or ((t = 17Y) and (si = 18S)) or ((t = 27Y) and (si = 28S)) or ((t = 37Y) and (si = 38S)) or ((t = 47Y) and (si = 48S)) or ((t = 52Y) and (si = 53S))) (type: boolean) - Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/llap/annotate_stats_filter.q.out deleted file mode 100644 index fc0cad26a6..0000000000 --- a/ql/src/test/results/clientpositive/llap/annotate_stats_filter.q.out +++ /dev/null @@ -1,783 +0,0 @@ -PREHOOK: query: create table if not exists loc_staging ( - state string, - locid int, - zip bigint, - year int -) row format delimited fields terminated by '|' stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@loc_staging -POSTHOOK: query: create table if not exists loc_staging ( - state string, - locid int, - zip bigint, - year int -) row format delimited fields terminated by '|' stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@loc_staging -PREHOOK: query: create table loc_orc like loc_staging -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@loc_orc -POSTHOOK: query: create table loc_orc like loc_staging -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@loc_orc -PREHOOK: query: alter table loc_orc set fileformat orc -PREHOOK: type: ALTERTABLE_FILEFORMAT -PREHOOK: Input: default@loc_orc -PREHOOK: Output: default@loc_orc -POSTHOOK: query: alter table loc_orc set fileformat orc -POSTHOOK: type: ALTERTABLE_FILEFORMAT -POSTHOOK: Input: default@loc_orc -POSTHOOK: Output: default@loc_orc -PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@loc_staging -POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@loc_staging -PREHOOK: query: insert overwrite table loc_orc select * from loc_staging -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_staging -PREHOOK: Output: default@loc_orc -POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_staging -POSTHOOK: Output: default@loc_orc -POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] -POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] -POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] -POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] -PREHOOK: query: explain select * from loc_orc -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where state='OH' -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where state='OH' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (state = 'OH') (type: boolean) - Filter Operator - predicate: (state = 'OH') (type: boolean) - Select Operator - expressions: 'OH' (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@loc_orc -PREHOOK: Output: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@loc_orc -POSTHOOK: Output: default@loc_orc -#### A masked pattern was here #### -PREHOOK: query: explain select * from loc_orc where state='OH' -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where state='OH' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (state = 'OH') (type: boolean) - Filter Operator - predicate: (state = 'OH') (type: boolean) - Select Operator - expressions: 'OH' (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where state!='OH' -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where state!='OH' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (state <> 'OH') (type: boolean) - Filter Operator - predicate: (state <> 'OH') (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where state<>'OH' -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where state<>'OH' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (state <> 'OH') (type: boolean) - Filter Operator - predicate: (state <> 'OH') (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where zip is null -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where zip is null -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: zip is null (type: boolean) - Filter Operator - predicate: zip is null (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), null (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where !(zip is not null) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where !(zip is not null) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: zip is null (type: boolean) - Filter Operator - predicate: zip is null (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), null (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where zip is not null -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where zip is not null -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: zip is not null (type: boolean) - Filter Operator - predicate: zip is not null (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where !(zip is null) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where !(zip is null) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: zip is not null (type: boolean) - Filter Operator - predicate: zip is not null (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where !false -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where !false -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where !true -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where !true -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 0 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from loc_orc where true -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where true -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where 'foo' -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where 'foo' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - Filter Operator - predicate: 'foo' (type: string) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where true = true -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where true = true -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where false = true -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where false = true -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 0 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from loc_orc where 'foo' = 'bar' -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where 'foo' = 'bar' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 0 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from loc_orc where false -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where false -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 0 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from loc_orc where state='OH' or state='CA' -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where state='OH' or state='CA' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (state) IN ('OH', 'CA') (type: boolean) - Filter Operator - predicate: (state) IN ('OH', 'CA') (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where year=2001 and year is null -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where year=2001 and year is null -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 0 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from loc_orc where year=2001 and state='OH' and state='FL' -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where year=2001 and state='OH' and state='FL' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 0 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from loc_orc where (year=2001 and year is null) or (state='CA') -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where (year=2001 and year is null) or (state='CA') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (state = 'CA') (type: boolean) - Filter Operator - predicate: (state = 'CA') (type: boolean) - Select Operator - expressions: 'CA' (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where (year=2001 or year is null) and (state='CA') -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where (year=2001 or year is null) and (state='CA') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (((year = 2001) or year is null) and (state = 'CA')) (type: boolean) - Filter Operator - predicate: (((year = 2001) or year is null) and (state = 'CA')) (type: boolean) - Select Operator - expressions: 'CA' (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where locid < 30 -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where locid < 30 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (locid < 30) (type: boolean) - Filter Operator - predicate: (locid < 30) (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where locid > 30 -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where locid > 30 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (locid > 30) (type: boolean) - Filter Operator - predicate: (locid > 30) (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where locid <= 30 -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where locid <= 30 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (locid <= 30) (type: boolean) - Filter Operator - predicate: (locid <= 30) (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where locid >= 30 -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where locid >= 30 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (locid >= 30) (type: boolean) - Filter Operator - predicate: (locid >= 30) (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where locid < 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where locid < 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (locid < 3) (type: boolean) - Filter Operator - predicate: (locid < 3) (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where locid > 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where locid > 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (locid > 3) (type: boolean) - Filter Operator - predicate: (locid > 3) (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where locid <= 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where locid <= 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (locid <= 3) (type: boolean) - Filter Operator - predicate: (locid <= 3) (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: explain select * from loc_orc where locid >= 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc where locid >= 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc - filterExpr: (locid >= 3) (type: boolean) - Filter Operator - predicate: (locid >= 3) (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/llap/annotate_stats_groupby.q.out deleted file mode 100644 index 73e43bee85..0000000000 --- a/ql/src/test/results/clientpositive/llap/annotate_stats_groupby.q.out +++ /dev/null @@ -1,1808 +0,0 @@ -PREHOOK: query: create table if not exists loc_staging_n2 ( - state string, - locid int, - zip bigint, - year int -) row format delimited fields terminated by '|' stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@loc_staging_n2 -POSTHOOK: query: create table if not exists loc_staging_n2 ( - state string, - locid int, - zip bigint, - year int -) row format delimited fields terminated by '|' stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@loc_staging_n2 -PREHOOK: query: create table loc_orc_n2 like loc_staging_n2 -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@loc_orc_n2 -POSTHOOK: query: create table loc_orc_n2 like loc_staging_n2 -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@loc_orc_n2 -PREHOOK: query: alter table loc_orc_n2 set fileformat orc -PREHOOK: type: ALTERTABLE_FILEFORMAT -PREHOOK: Input: default@loc_orc_n2 -PREHOOK: Output: default@loc_orc_n2 -POSTHOOK: query: alter table loc_orc_n2 set fileformat orc -POSTHOOK: type: ALTERTABLE_FILEFORMAT -POSTHOOK: Input: default@loc_orc_n2 -POSTHOOK: Output: default@loc_orc_n2 -PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging_n2 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@loc_staging_n2 -POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging_n2 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@loc_staging_n2 -PREHOOK: query: insert overwrite table loc_orc_n2 select * from loc_staging_n2 -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_staging_n2 -PREHOOK: Output: default@loc_orc_n2 -POSTHOOK: query: insert overwrite table loc_orc_n2 select * from loc_staging_n2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_staging_n2 -POSTHOOK: Output: default@loc_orc_n2 -POSTHOOK: Lineage: loc_orc_n2.locid SIMPLE [(loc_staging_n2)loc_staging_n2.FieldSchema(name:locid, type:int, comment:null), ] -POSTHOOK: Lineage: loc_orc_n2.state SIMPLE [(loc_staging_n2)loc_staging_n2.FieldSchema(name:state, type:string, comment:null), ] -POSTHOOK: Lineage: loc_orc_n2.year SIMPLE [(loc_staging_n2)loc_staging_n2.FieldSchema(name:year, type:int, comment:null), ] -POSTHOOK: Lineage: loc_orc_n2.zip SIMPLE [(loc_staging_n2)loc_staging_n2.FieldSchema(name:zip, type:bigint, comment:null), ] -PREHOOK: query: explain select * from loc_orc_n2 -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc_n2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc_n2 - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: analyze table loc_orc_n2 compute statistics for columns state -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@loc_orc_n2 -PREHOOK: Output: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: analyze table loc_orc_n2 compute statistics for columns state -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@loc_orc_n2 -POSTHOOK: Output: default@loc_orc_n2 -#### A masked pattern was here #### -PREHOOK: query: explain select a, c, min(b) -from ( select state as a, locid as b, count(*) as c - from loc_orc_n2 - group by state,locid - ) sq1 -group by a,c -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select a, c, min(b) -from ( select state as a, locid as b, count(*) as c - from loc_orc_n2 - group by state,locid - ) sq1 -group by a,c -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: state (type: string), locid (type: int) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string), _col2 (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: analyze table loc_orc_n2 compute statistics for columns state,locid,year -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@loc_orc_n2 -PREHOOK: Output: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: analyze table loc_orc_n2 compute statistics for columns state,locid,year -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@loc_orc_n2 -POSTHOOK: Output: default@loc_orc_n2 -#### A masked pattern was here #### -PREHOOK: query: explain select year from loc_orc_n2 group by year -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select year from loc_orc_n2 group by year -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: year (type: int) - outputColumnNames: year - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: year (type: int) - minReductionHashAggr: 0.75 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with rollup -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with rollup -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by rollup( state,locid ) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by rollup( state,locid ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state)) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid)) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid),()) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid),()) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2352 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state,locid),(state),(locid),()) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state,locid),(state),(locid),()) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 32 Data size: 3136 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select year from loc_orc_n2 group by year -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select year from loc_orc_n2 group by year -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: year (type: int) - outputColumnNames: year - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: year (type: int) - minReductionHashAggr: 0.875 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,zip from loc_orc_n2 group by state,zip -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,zip from loc_orc_n2 group by state,zip -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), zip (type: bigint) - outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), zip (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with rollup -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with rollup -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by rollup (state,locid) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by rollup (state,locid) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state)) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid)) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid),()) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state),(locid),()) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state,locid),(state),(locid),()) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid grouping sets((state,locid),(state),(locid),()) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select year from loc_orc_n2 group by year -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select year from loc_orc_n2 group by year -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: year (type: int) - outputColumnNames: year - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: year (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n2 group by state,locid with cube -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n2 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), 0L (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: create table t1_uq12(i int, j int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t1_uq12 -POSTHOOK: query: create table t1_uq12(i int, j int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t1_uq12 -PREHOOK: query: alter table t1_uq12 update statistics set('numRows'='10000', 'rawDataSize'='18000') -PREHOOK: type: ALTERTABLE_UPDATETABLESTATS -PREHOOK: Input: default@t1_uq12 -PREHOOK: Output: default@t1_uq12 -POSTHOOK: query: alter table t1_uq12 update statistics set('numRows'='10000', 'rawDataSize'='18000') -POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS -POSTHOOK: Input: default@t1_uq12 -POSTHOOK: Output: default@t1_uq12 -PREHOOK: query: alter table t1_uq12 update statistics for column i set('numDVs'='2500','numNulls'='50','highValue'='1000','lowValue'='0') -PREHOOK: type: ALTERTABLE_UPDATETABLESTATS -PREHOOK: Input: default@t1_uq12 -PREHOOK: Output: default@t1_uq12 -POSTHOOK: query: alter table t1_uq12 update statistics for column i set('numDVs'='2500','numNulls'='50','highValue'='1000','lowValue'='0') -POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS -POSTHOOK: Input: default@t1_uq12 -POSTHOOK: Output: default@t1_uq12 -PREHOOK: query: alter table t1_uq12 update statistics for column j set('numDVs'='500','numNulls'='30','highValue'='100','lowValue'='50') -PREHOOK: type: ALTERTABLE_UPDATETABLESTATS -PREHOOK: Input: default@t1_uq12 -PREHOOK: Output: default@t1_uq12 -POSTHOOK: query: alter table t1_uq12 update statistics for column j set('numDVs'='500','numNulls'='30','highValue'='100','lowValue'='50') -POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS -POSTHOOK: Input: default@t1_uq12 -POSTHOOK: Output: default@t1_uq12 -PREHOOK: query: create table t2_uq12(i2 int, j2 int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t2_uq12 -POSTHOOK: query: create table t2_uq12(i2 int, j2 int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t2_uq12 -PREHOOK: query: alter table t2_uq12 update statistics set('numRows'='100000000', 'rawDataSize'='10000') -PREHOOK: type: ALTERTABLE_UPDATETABLESTATS -PREHOOK: Input: default@t2_uq12 -PREHOOK: Output: default@t2_uq12 -POSTHOOK: query: alter table t2_uq12 update statistics set('numRows'='100000000', 'rawDataSize'='10000') -POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS -POSTHOOK: Input: default@t2_uq12 -POSTHOOK: Output: default@t2_uq12 -PREHOOK: query: alter table t2_uq12 update statistics for column i2 set('numDVs'='10000000','numNulls'='0','highValue'='8000','lowValue'='0') -PREHOOK: type: ALTERTABLE_UPDATETABLESTATS -PREHOOK: Input: default@t2_uq12 -PREHOOK: Output: default@t2_uq12 -POSTHOOK: query: alter table t2_uq12 update statistics for column i2 set('numDVs'='10000000','numNulls'='0','highValue'='8000','lowValue'='0') -POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS -POSTHOOK: Input: default@t2_uq12 -POSTHOOK: Output: default@t2_uq12 -PREHOOK: query: alter table t2_uq12 update statistics for column j2 set('numDVs'='10','numNulls'='0','highValue'='800','lowValue'='-1') -PREHOOK: type: ALTERTABLE_UPDATETABLESTATS -PREHOOK: Input: default@t2_uq12 -PREHOOK: Output: default@t2_uq12 -POSTHOOK: query: alter table t2_uq12 update statistics for column j2 set('numDVs'='10','numNulls'='0','highValue'='800','lowValue'='-1') -POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS -POSTHOOK: Input: default@t2_uq12 -POSTHOOK: Output: default@t2_uq12 -PREHOOK: query: explain select count (1) from t1_uq12,t2_uq12 where t1_uq12.j=t2_uq12.i2 group by t1_uq12.i, t1_uq12.j -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_uq12 -PREHOOK: Input: default@t2_uq12 -#### A masked pattern was here #### -POSTHOOK: query: explain select count (1) from t1_uq12,t2_uq12 where t1_uq12.j=t2_uq12.i2 group by t1_uq12.i, t1_uq12.j -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_uq12 -POSTHOOK: Input: default@t2_uq12 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 5 <- Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1_uq12 - filterExpr: j is not null (type: boolean) - Statistics: Num rows: 10000 Data size: 79688 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: j is not null (type: boolean) - Statistics: Num rows: 9970 Data size: 79448 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i (type: int), j (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9970 Data size: 79448 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 9970 Data size: 79448 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 9970 Data size: 79528 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: t2_uq12 - filterExpr: (i2 is not null and i2 BETWEEN DynamicValue(RS_6_t1_uq12_j_min) AND DynamicValue(RS_6_t1_uq12_j_max) and in_bloom_filter(i2, DynamicValue(RS_6_t1_uq12_j_bloom_filter))) (type: boolean) - Statistics: Num rows: 100000000 Data size: 400000000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (i2 is not null and i2 BETWEEN DynamicValue(RS_6_t1_uq12_j_min) AND DynamicValue(RS_6_t1_uq12_j_max) and in_bloom_filter(i2, DynamicValue(RS_6_t1_uq12_j_bloom_filter))) (type: boolean) - Statistics: Num rows: 100000000 Data size: 400000000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i2 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100000000 Data size: 400000000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100000000 Data size: 400000000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 99700 Data size: 797288 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: int), _col1 (type: int) - minReductionHashAggr: 0.43852556 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9970 Data size: 159496 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 9970 Data size: 159496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9970 Data size: 159496 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 9970 Data size: 79760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 9970 Data size: 79760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: drop table t1_uq12 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@t1_uq12 -PREHOOK: Output: default@t1_uq12 -POSTHOOK: query: drop table t1_uq12 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@t1_uq12 -POSTHOOK: Output: default@t1_uq12 -PREHOOK: query: drop table t2_uq12 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@t2_uq12 -PREHOOK: Output: default@t2_uq12 -POSTHOOK: query: drop table t2_uq12 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@t2_uq12 -POSTHOOK: Output: default@t2_uq12 diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_groupby2.q.out b/ql/src/test/results/clientpositive/llap/annotate_stats_groupby2.q.out deleted file mode 100644 index 3f009fee8e..0000000000 --- a/ql/src/test/results/clientpositive/llap/annotate_stats_groupby2.q.out +++ /dev/null @@ -1,504 +0,0 @@ -PREHOOK: query: drop table location -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table location -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table location (state string, country string, votes bigint) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@location -POSTHOOK: query: create table location (state string, country string, votes bigint) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@location -PREHOOK: query: load data local inpath "../../data/files/location.txt" overwrite into table location -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@location -POSTHOOK: query: load data local inpath "../../data/files/location.txt" overwrite into table location -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@location -PREHOOK: query: analyze table location compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@location -PREHOOK: Output: default@location -POSTHOOK: query: analyze table location compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@location -POSTHOOK: Output: default@location -PREHOOK: query: analyze table location compute statistics for columns state, country -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@location -PREHOOK: Output: default@location -#### A masked pattern was here #### -POSTHOOK: query: analyze table location compute statistics for columns state, country -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@location -POSTHOOK: Output: default@location -#### A masked pattern was here #### -PREHOOK: query: explain select state, country from location group by state, country -PREHOOK: type: QUERY -PREHOOK: Input: default@location -#### A masked pattern was here #### -POSTHOOK: query: explain select state, country from location group by state, country -POSTHOOK: type: QUERY -POSTHOOK: Input: default@location -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: location - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), country (type: string) - outputColumnNames: state, country - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), country (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state, country from location group by state, country with cube -PREHOOK: type: QUERY -PREHOOK: Input: default@location -#### A masked pattern was here #### -POSTHOOK: query: explain select state, country from location group by state, country with cube -POSTHOOK: type: QUERY -POSTHOOK: Input: default@location -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: location - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), country (type: string) - outputColumnNames: state, country - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), country (type: string), 0L (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 80 Data size: 800 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 80 Data size: 800 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40 Data size: 400 Basic stats: COMPLETE Column stats: NONE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 40 Data size: 400 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state, country from location group by state, country -PREHOOK: type: QUERY -PREHOOK: Input: default@location -#### A masked pattern was here #### -POSTHOOK: query: explain select state, country from location group by state, country -POSTHOOK: type: QUERY -POSTHOOK: Input: default@location -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: location - Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), country (type: string) - outputColumnNames: state, country - Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), country (type: string) - minReductionHashAggr: 0.9 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1730 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 10 Data size: 1730 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state, votes from location group by state, votes -PREHOOK: type: QUERY -PREHOOK: Input: default@location -#### A masked pattern was here #### -POSTHOOK: query: explain select state, votes from location group by state, votes -POSTHOOK: type: QUERY -POSTHOOK: Input: default@location -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: location - Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: state (type: string), votes (type: bigint) - outputColumnNames: state, votes - Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - keys: state (type: string), votes (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state, country from location group by state, country with cube -PREHOOK: type: QUERY -PREHOOK: Input: default@location -#### A masked pattern was here #### -POSTHOOK: query: explain select state, country from location group by state, country with cube -POSTHOOK: type: QUERY -POSTHOOK: Input: default@location -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: location - Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), country (type: string) - outputColumnNames: state, country - Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), country (type: string), 0L (type: bigint) - minReductionHashAggr: 0.9 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 40 Data size: 7240 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 1448 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 1448 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state, country from location group by state, country -PREHOOK: type: QUERY -PREHOOK: Input: default@location -#### A masked pattern was here #### -POSTHOOK: query: explain select state, country from location group by state, country -POSTHOOK: type: QUERY -POSTHOOK: Input: default@location -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: location - Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), country (type: string) - outputColumnNames: state, country - Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), country (type: string) - minReductionHashAggr: 0.9 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state, country from location group by state, country with cube -PREHOOK: type: QUERY -PREHOOK: Input: default@location -#### A masked pattern was here #### -POSTHOOK: query: explain select state, country from location group by state, country with cube -POSTHOOK: type: QUERY -POSTHOOK: Input: default@location -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: location - Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), country (type: string) - outputColumnNames: state, country - Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), country (type: string), 0L (type: bigint) - minReductionHashAggr: 0.9 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 80 Data size: 14480 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 80 Data size: 14480 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 1448 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 1448 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: drop table location -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@location -PREHOOK: Output: default@location -POSTHOOK: query: drop table location -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@location -POSTHOOK: Output: default@location diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/llap/annotate_stats_join.q.out deleted file mode 100644 index 750c7465fd..0000000000 --- a/ql/src/test/results/clientpositive/llap/annotate_stats_join.q.out +++ /dev/null @@ -1,1221 +0,0 @@ -PREHOOK: query: create table if not exists emp_n2 ( - lastname string, - deptid int, - locid int -) row format delimited fields terminated by '|' stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@emp_n2 -POSTHOOK: query: create table if not exists emp_n2 ( - lastname string, - deptid int, - locid int -) row format delimited fields terminated by '|' stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@emp_n2 -PREHOOK: query: create table if not exists dept_n1 ( - deptid int, - deptname string -) row format delimited fields terminated by '|' stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dept_n1 -POSTHOOK: query: create table if not exists dept_n1 ( - deptid int, - deptname string -) row format delimited fields terminated by '|' stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dept_n1 -PREHOOK: query: create table if not exists loc ( - state string, - locid int, - zip bigint, - year int -) row format delimited fields terminated by '|' stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@loc -POSTHOOK: query: create table if not exists loc ( - state string, - locid int, - zip bigint, - year int -) row format delimited fields terminated by '|' stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@loc -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_n2 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@emp_n2 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_n2 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@emp_n2 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_n1 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@dept_n1 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_n1 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@dept_n1 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@loc -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@loc -PREHOOK: query: analyze table emp_n2 compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@emp_n2 -PREHOOK: Output: default@emp_n2 -POSTHOOK: query: analyze table emp_n2 compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@emp_n2 -POSTHOOK: Output: default@emp_n2 -PREHOOK: query: analyze table dept_n1 compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@dept_n1 -PREHOOK: Output: default@dept_n1 -POSTHOOK: query: analyze table dept_n1 compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dept_n1 -POSTHOOK: Output: default@dept_n1 -PREHOOK: query: analyze table loc compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@loc -PREHOOK: Output: default@loc -POSTHOOK: query: analyze table loc compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc -POSTHOOK: Output: default@loc -PREHOOK: query: analyze table emp_n2 compute statistics for columns lastname,deptid,locid -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@emp_n2 -PREHOOK: Output: default@emp_n2 -#### A masked pattern was here #### -POSTHOOK: query: analyze table emp_n2 compute statistics for columns lastname,deptid,locid -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@emp_n2 -POSTHOOK: Output: default@emp_n2 -#### A masked pattern was here #### -PREHOOK: query: analyze table dept_n1 compute statistics for columns deptname,deptid -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@dept_n1 -PREHOOK: Output: default@dept_n1 -#### A masked pattern was here #### -POSTHOOK: query: analyze table dept_n1 compute statistics for columns deptname,deptid -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@dept_n1 -POSTHOOK: Output: default@dept_n1 -#### A masked pattern was here #### -PREHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@loc -PREHOOK: Output: default@loc -#### A masked pattern was here #### -POSTHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@loc -POSTHOOK: Output: default@loc -#### A masked pattern was here #### -PREHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid) -PREHOOK: type: QUERY -PREHOOK: Input: default@dept_n1 -PREHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dept_n1 -POSTHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: e - filterExpr: deptid is not null (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: deptid is not null (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: lastname (type: string), deptid (type: int), locid (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col2 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: d - filterExpr: deptid is not null (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: deptid is not null (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from emp_n2,dept_n1 where emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname -PREHOOK: type: QUERY -PREHOOK: Input: default@dept_n1 -PREHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from emp_n2,dept_n1 where emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dept_n1 -POSTHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: emp_n2 - filterExpr: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: lastname (type: string), deptid (type: int), locid (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: dept_n1 - filterExpr: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid and e.lastname = d.deptname) -PREHOOK: type: QUERY -PREHOOK: Input: default@dept_n1 -PREHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid and e.lastname = d.deptname) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dept_n1 -POSTHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: e - filterExpr: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: lastname (type: string), deptid (type: int), locid (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: d - filterExpr: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from emp_n2,dept_n1 where emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname -PREHOOK: type: QUERY -PREHOOK: Input: default@dept_n1 -PREHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from emp_n2,dept_n1 where emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dept_n1 -POSTHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: emp_n2 - filterExpr: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: lastname (type: string), deptid (type: int), locid (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: dept_n1 - filterExpr: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid) join emp_n2 e1 on (e.deptid = e1.deptid) -PREHOOK: type: QUERY -PREHOOK: Input: default@dept_n1 -PREHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid) join emp_n2 e1 on (e.deptid = e1.deptid) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dept_n1 -POSTHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: e - filterExpr: deptid is not null (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: deptid is not null (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: lastname (type: string), deptid (type: int), locid (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col2 (type: int) - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col2 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: d - filterExpr: deptid is not null (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: deptid is not null (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 768 Data size: 225024 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 768 Data size: 225024 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid) -PREHOOK: type: QUERY -PREHOOK: Input: default@dept_n1 -PREHOOK: Input: default@emp_n2 -PREHOOK: Input: default@loc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dept_n1 -POSTHOOK: Input: default@emp_n2 -POSTHOOK: Input: default@loc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: e - filterExpr: deptid is not null (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: deptid is not null (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: lastname (type: string), deptid (type: int), locid (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col2 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: d - filterExpr: deptid is not null (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: deptid is not null (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: l - filterExpr: locid is not null (type: boolean) - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: locid is not null (type: boolean) - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col2 (type: bigint), _col3 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state) -PREHOOK: type: QUERY -PREHOOK: Input: default@dept_n1 -PREHOOK: Input: default@emp_n2 -PREHOOK: Input: default@loc -#### A masked pattern was here #### -POSTHOOK: query: explain select * from emp_n2 e join dept_n1 d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dept_n1 -POSTHOOK: Input: default@emp_n2 -POSTHOOK: Input: default@loc -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: e - filterExpr: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: lastname (type: string), deptid (type: int), locid (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: d - filterExpr: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: l - filterExpr: (locid is not null and state is not null) (type: boolean) - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (locid is not null and state is not null) (type: boolean) - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint), _col3 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: int) - 1 _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from emp_n2 left outer join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname -PREHOOK: type: QUERY -PREHOOK: Input: default@dept_n1 -PREHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from emp_n2 left outer join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dept_n1 -POSTHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: emp_n2 - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: lastname (type: string), deptid (type: int), locid (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: dept_n1 - filterExpr: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from emp_n2 left semi join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname -PREHOOK: type: QUERY -PREHOOK: Input: default@dept_n1 -PREHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from emp_n2 left semi join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dept_n1 -POSTHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: emp_n2 - filterExpr: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: lastname (type: string), deptid (type: int), locid (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: dept_n1 - filterExpr: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptname (type: string), deptid (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: int) - minReductionHashAggr: 0.16666669 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: int) - 1 _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from emp_n2 right outer join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname -PREHOOK: type: QUERY -PREHOOK: Input: default@dept_n1 -PREHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from emp_n2 right outer join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dept_n1 -POSTHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: emp_n2 - filterExpr: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: lastname (type: string), deptid (type: int), locid (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: dept_n1 - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select * from emp_n2 full outer join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname -PREHOOK: type: QUERY -PREHOOK: Input: default@dept_n1 -PREHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from emp_n2 full outer join dept_n1 on emp_n2.deptid = dept_n1.deptid and emp_n2.lastname = dept_n1.deptname and dept_n1.deptname = emp_n2.lastname -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dept_n1 -POSTHOOK: Input: default@emp_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: emp_n2 - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: lastname (type: string), deptid (type: int), locid (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: dept_n1 - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/llap/annotate_stats_join_pkfk.q.out deleted file mode 100644 index 0b1d492f67..0000000000 --- a/ql/src/test/results/clientpositive/llap/annotate_stats_join_pkfk.q.out +++ /dev/null @@ -1,1549 +0,0 @@ -PREHOOK: query: drop table store_sales_n0 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table store_sales_n0 -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table store_n0 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table store_n0 -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table customer_address -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table customer_address -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table store_sales_n0 -( - ss_sold_date_sk int, - ss_sold_time_sk int, - ss_item_sk int, - ss_customer_sk int, - ss_cdemo_sk int, - ss_hdemo_sk int, - ss_addr_sk int, - ss_store_sk int, - ss_promo_sk int, - ss_ticket_number int, - ss_quantity int, - ss_wholesale_cost float, - ss_list_price float, - ss_sales_price float, - ss_ext_discount_amt float, - ss_ext_sales_price float, - ss_ext_wholesale_cost float, - ss_ext_list_price float, - ss_ext_tax float, - ss_coupon_amt float, - ss_net_paid float, - ss_net_paid_inc_tax float, - ss_net_profit float -) -row format delimited fields terminated by '|' -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@store_sales_n0 -POSTHOOK: query: create table store_sales_n0 -( - ss_sold_date_sk int, - ss_sold_time_sk int, - ss_item_sk int, - ss_customer_sk int, - ss_cdemo_sk int, - ss_hdemo_sk int, - ss_addr_sk int, - ss_store_sk int, - ss_promo_sk int, - ss_ticket_number int, - ss_quantity int, - ss_wholesale_cost float, - ss_list_price float, - ss_sales_price float, - ss_ext_discount_amt float, - ss_ext_sales_price float, - ss_ext_wholesale_cost float, - ss_ext_list_price float, - ss_ext_tax float, - ss_coupon_amt float, - ss_net_paid float, - ss_net_paid_inc_tax float, - ss_net_profit float -) -row format delimited fields terminated by '|' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@store_sales_n0 -PREHOOK: query: create table store_n0 -( - s_store_sk int, - s_store_id string, - s_rec_start_date string, - s_rec_end_date string, - s_closed_date_sk int, - s_store_name string, - s_number_employees int, - s_floor_space int, - s_hours string, - s_manager string, - s_market_id int, - s_geography_class string, - s_market_desc string, - s_market_manager string, - s_division_id int, - s_division_name string, - s_company_id int, - s_company_name string, - s_street_number string, - s_street_name string, - s_street_type string, - s_suite_number string, - s_city string, - s_county string, - s_state string, - s_zip string, - s_country string, - s_gmt_offset float, - s_tax_precentage float -) -row format delimited fields terminated by '|' -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@store_n0 -POSTHOOK: query: create table store_n0 -( - s_store_sk int, - s_store_id string, - s_rec_start_date string, - s_rec_end_date string, - s_closed_date_sk int, - s_store_name string, - s_number_employees int, - s_floor_space int, - s_hours string, - s_manager string, - s_market_id int, - s_geography_class string, - s_market_desc string, - s_market_manager string, - s_division_id int, - s_division_name string, - s_company_id int, - s_company_name string, - s_street_number string, - s_street_name string, - s_street_type string, - s_suite_number string, - s_city string, - s_county string, - s_state string, - s_zip string, - s_country string, - s_gmt_offset float, - s_tax_precentage float -) -row format delimited fields terminated by '|' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@store_n0 -PREHOOK: query: create table store_bigint -( - s_store_sk bigint, - s_store_id string, - s_rec_start_date string, - s_rec_end_date string, - s_closed_date_sk int, - s_store_name string, - s_number_employees int, - s_floor_space int, - s_hours string, - s_manager string, - s_market_id int, - s_geography_class string, - s_market_desc string, - s_market_manager string, - s_division_id int, - s_division_name string, - s_company_id int, - s_company_name string, - s_street_number string, - s_street_name string, - s_street_type string, - s_suite_number string, - s_city string, - s_county string, - s_state string, - s_zip string, - s_country string, - s_gmt_offset float, - s_tax_precentage float -) -row format delimited fields terminated by '|' -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@store_bigint -POSTHOOK: query: create table store_bigint -( - s_store_sk bigint, - s_store_id string, - s_rec_start_date string, - s_rec_end_date string, - s_closed_date_sk int, - s_store_name string, - s_number_employees int, - s_floor_space int, - s_hours string, - s_manager string, - s_market_id int, - s_geography_class string, - s_market_desc string, - s_market_manager string, - s_division_id int, - s_division_name string, - s_company_id int, - s_company_name string, - s_street_number string, - s_street_name string, - s_street_type string, - s_suite_number string, - s_city string, - s_county string, - s_state string, - s_zip string, - s_country string, - s_gmt_offset float, - s_tax_precentage float -) -row format delimited fields terminated by '|' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@store_bigint -PREHOOK: query: create table customer_address -( - ca_address_sk int, - ca_address_id string, - ca_street_number string, - ca_street_name string, - ca_street_type string, - ca_suite_number string, - ca_city string, - ca_county string, - ca_state string, - ca_zip string, - ca_country string, - ca_gmt_offset float, - ca_location_type string -) -row format delimited fields terminated by '|' -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@customer_address -POSTHOOK: query: create table customer_address -( - ca_address_sk int, - ca_address_id string, - ca_street_number string, - ca_street_name string, - ca_street_type string, - ca_suite_number string, - ca_city string, - ca_county string, - ca_state string, - ca_zip string, - ca_country string, - ca_gmt_offset float, - ca_location_type string -) -row format delimited fields terminated by '|' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@customer_address -PREHOOK: query: load data local inpath '../../data/files/store.txt' overwrite into table store_n0 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@store_n0 -POSTHOOK: query: load data local inpath '../../data/files/store.txt' overwrite into table store_n0 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@store_n0 -PREHOOK: query: load data local inpath '../../data/files/store.txt' overwrite into table store_bigint -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@store_bigint -POSTHOOK: query: load data local inpath '../../data/files/store.txt' overwrite into table store_bigint -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@store_bigint -PREHOOK: query: load data local inpath '../../data/files/store_sales.txt' overwrite into table store_sales_n0 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@store_sales_n0 -POSTHOOK: query: load data local inpath '../../data/files/store_sales.txt' overwrite into table store_sales_n0 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@store_sales_n0 -PREHOOK: query: load data local inpath '../../data/files/customer_address.txt' overwrite into table customer_address -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@customer_address -POSTHOOK: query: load data local inpath '../../data/files/customer_address.txt' overwrite into table customer_address -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@customer_address -PREHOOK: query: analyze table store_n0 compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@store_n0 -PREHOOK: Output: default@store_n0 -POSTHOOK: query: analyze table store_n0 compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_n0 -POSTHOOK: Output: default@store_n0 -PREHOOK: query: analyze table store_n0 compute statistics for columns s_store_sk, s_floor_space -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@store_n0 -PREHOOK: Output: default@store_n0 -#### A masked pattern was here #### -POSTHOOK: query: analyze table store_n0 compute statistics for columns s_store_sk, s_floor_space -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@store_n0 -POSTHOOK: Output: default@store_n0 -#### A masked pattern was here #### -PREHOOK: query: analyze table store_bigint compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@store_bigint -PREHOOK: Output: default@store_bigint -POSTHOOK: query: analyze table store_bigint compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_bigint -POSTHOOK: Output: default@store_bigint -PREHOOK: query: analyze table store_bigint compute statistics for columns s_store_sk, s_floor_space -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@store_bigint -PREHOOK: Output: default@store_bigint -#### A masked pattern was here #### -POSTHOOK: query: analyze table store_bigint compute statistics for columns s_store_sk, s_floor_space -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@store_bigint -POSTHOOK: Output: default@store_bigint -#### A masked pattern was here #### -PREHOOK: query: analyze table store_sales_n0 compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@store_sales_n0 -PREHOOK: Output: default@store_sales_n0 -POSTHOOK: query: analyze table store_sales_n0 compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_sales_n0 -POSTHOOK: Output: default@store_sales_n0 -PREHOOK: query: analyze table store_sales_n0 compute statistics for columns ss_store_sk, ss_addr_sk, ss_quantity -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@store_sales_n0 -PREHOOK: Output: default@store_sales_n0 -#### A masked pattern was here #### -POSTHOOK: query: analyze table store_sales_n0 compute statistics for columns ss_store_sk, ss_addr_sk, ss_quantity -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@store_sales_n0 -POSTHOOK: Output: default@store_sales_n0 -#### A masked pattern was here #### -PREHOOK: query: analyze table customer_address compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@customer_address -PREHOOK: Output: default@customer_address -POSTHOOK: query: analyze table customer_address compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@customer_address -POSTHOOK: Output: default@customer_address -PREHOOK: query: analyze table customer_address compute statistics for columns ca_address_sk -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@customer_address -PREHOOK: Output: default@customer_address -#### A masked pattern was here #### -POSTHOOK: query: analyze table customer_address compute statistics for columns ca_address_sk -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@customer_address -POSTHOOK: Output: default@customer_address -#### A masked pattern was here #### -PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) -PREHOOK: type: QUERY -PREHOOK: Input: default@store_n0 -PREHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_n0 -POSTHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - filterExpr: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: ss - filterExpr: ss_store_sk is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ss_store_sk is not null (type: boolean) - Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ss_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select s.s_store_sk from store_bigint s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) -PREHOOK: type: QUERY -PREHOOK: Input: default@store_bigint -PREHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -POSTHOOK: query: explain select s.s_store_sk from store_bigint s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_bigint -POSTHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - filterExpr: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_store_sk (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: ss - filterExpr: UDFToLong(ss_store_sk) is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: UDFToLong(ss_store_sk) is not null (type: boolean) - Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToLong(ss_store_sk) (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 964 Data size: 7440 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 964 Data size: 7440 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 964 Data size: 7712 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 964 Data size: 7712 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@store_n0 -PREHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_n0 -POSTHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - filterExpr: (s_store_sk > 0) (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (s_store_sk > 0) (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: ss - filterExpr: (ss_store_sk > 0) (type: boolean) - Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (ss_store_sk > 0) (type: boolean) - Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ss_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where s.s_company_id > 0 and ss.ss_quantity > 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@store_n0 -PREHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where s.s_company_id > 0 and ss.ss_quantity > 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_n0 -POSTHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - filterExpr: ((s_company_id > 0) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Filter Operator - predicate: ((s_company_id > 0) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: PARTIAL - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: ss - filterExpr: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 876 Data size: 6724 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ss_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 876 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 876 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 90 Data size: 360 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 90 Data size: 360 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where s.s_floor_space > 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@store_n0 -PREHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where s.s_floor_space > 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_n0 -POSTHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - filterExpr: ((s_floor_space > 0) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((s_floor_space > 0) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: ss - filterExpr: ss_store_sk is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ss_store_sk is not null (type: boolean) - Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ss_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where ss.ss_quantity > 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@store_n0 -PREHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) where ss.ss_quantity > 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_n0 -POSTHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - filterExpr: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: ss - filterExpr: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 876 Data size: 6724 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ss_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 876 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 876 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 876 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 876 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) -PREHOOK: type: QUERY -PREHOOK: Input: default@store_n0 -PREHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_n0 -POSTHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: ss - filterExpr: ss_store_sk is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ss_store_sk is not null (type: boolean) - Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ss_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: s - filterExpr: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 964 Data size: 7576 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 964 Data size: 7576 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 1000 -PREHOOK: type: QUERY -PREHOOK: Input: default@store_n0 -PREHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 1000 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_n0 -POSTHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: ss - filterExpr: (ss_store_sk > 1000) (type: boolean) - Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (ss_store_sk > 1000) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ss_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: s - filterExpr: (s_store_sk > 1000) (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (s_store_sk > 1000) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) where s.s_floor_space > 1000 -PREHOOK: type: QUERY -PREHOOK: Input: default@store_n0 -PREHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) where s.s_floor_space > 1000 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_n0 -POSTHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: ss - filterExpr: ss_store_sk is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ss_store_sk is not null (type: boolean) - Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ss_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: s - filterExpr: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: s1 - filterExpr: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 964 Data size: 7576 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 964 Data size: 7576 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) where ss.ss_quantity > 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@store_n0 -PREHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join store_n0 s1 on (s1.s_store_sk = ss.ss_store_sk) where ss.ss_quantity > 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_n0 -POSTHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: ss - filterExpr: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 876 Data size: 6724 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ss_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 876 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 876 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: s - filterExpr: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 876 Data size: 6884 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 876 Data size: 6884 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 876 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 876 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 876 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join customer_address ca on (ca.ca_address_sk = ss.ss_addr_sk) -PREHOOK: type: QUERY -PREHOOK: Input: default@customer_address -PREHOOK: Input: default@store_n0 -PREHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -POSTHOOK: query: explain select s.s_store_sk from store_n0 s join store_sales_n0 ss on (s.s_store_sk = ss.ss_store_sk) join customer_address ca on (ca.ca_address_sk = ss.ss_addr_sk) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@customer_address -POSTHOOK: Input: default@store_n0 -POSTHOOK: Input: default@store_sales_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: ss - filterExpr: (ss_addr_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 7664 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (ss_addr_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 916 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ss_addr_sk (type: int), ss_store_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 916 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 916 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: s - filterExpr: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: ca - filterExpr: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ca_address_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 916 Data size: 7148 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 916 Data size: 7148 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2 - Statistics: Num rows: 223 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 223 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 223 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: drop table store_sales_n0 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@store_sales_n0 -PREHOOK: Output: default@store_sales_n0 -POSTHOOK: query: drop table store_sales_n0 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@store_sales_n0 -POSTHOOK: Output: default@store_sales_n0 -PREHOOK: query: drop table store_n0 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@store_n0 -PREHOOK: Output: default@store_n0 -POSTHOOK: query: drop table store_n0 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@store_n0 -POSTHOOK: Output: default@store_n0 -PREHOOK: query: drop table store_bigint -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@store_bigint -PREHOOK: Output: default@store_bigint -POSTHOOK: query: drop table store_bigint -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@store_bigint -POSTHOOK: Output: default@store_bigint -PREHOOK: query: drop table customer_address -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@customer_address -PREHOOK: Output: default@customer_address -POSTHOOK: query: drop table customer_address -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@customer_address -POSTHOOK: Output: default@customer_address diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_udtf.q.out b/ql/src/test/results/clientpositive/llap/annotate_stats_udtf.q.out deleted file mode 100644 index 6413ea4e5c..0000000000 --- a/ql/src/test/results/clientpositive/llap/annotate_stats_udtf.q.out +++ /dev/null @@ -1,184 +0,0 @@ -PREHOOK: query: drop table if exists HIVE_20262 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists HIVE_20262 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table HIVE_20262 (a array) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@HIVE_20262 -POSTHOOK: query: create table HIVE_20262 (a array) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@HIVE_20262 -PREHOOK: query: insert into HIVE_20262 select array(1) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@hive_20262 -POSTHOOK: query: insert into HIVE_20262 select array(1) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@hive_20262 -POSTHOOK: Lineage: hive_20262.a EXPRESSION [] -PREHOOK: query: insert into HIVE_20262 select array(2) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@hive_20262 -POSTHOOK: query: insert into HIVE_20262 select array(2) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@hive_20262 -POSTHOOK: Lineage: hive_20262.a EXPRESSION [] -PREHOOK: query: explain select explode(array(1,2,3,4,5)) as col -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -POSTHOOK: query: explain select explode(array(1,2,3,4,5)) as col -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Select Operator - expressions: array(1,2,3,4,5) (type: array) - outputColumnNames: _col0 - UDTF Operator - function name: explode - Select Operator - expressions: col (type: int) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: explain select explode(a) from HIVE_20262 -PREHOOK: type: QUERY -PREHOOK: Input: default@hive_20262 -#### A masked pattern was here #### -POSTHOOK: query: explain select explode(a) from HIVE_20262 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hive_20262 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: hive_20262 - Select Operator - expressions: a (type: array) - outputColumnNames: _col0 - UDTF Operator - function name: explode - Select Operator - expressions: col (type: int) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: explain select 1, r from HIVE_20262 - lateral view explode(a) t as r -PREHOOK: type: QUERY -PREHOOK: Input: default@hive_20262 -#### A masked pattern was here #### -POSTHOOK: query: explain select 1, r from HIVE_20262 - lateral view explode(a) t as r -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hive_20262 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: hive_20262 - Lateral View Forward - Select Operator - Lateral View Join Operator - outputColumnNames: _col4 - Select Operator - expressions: 1 (type: int), _col4 (type: int) - outputColumnNames: _col0, _col1 - ListSink - Select Operator - expressions: a (type: array) - outputColumnNames: _col0 - UDTF Operator - function name: explode - Lateral View Join Operator - outputColumnNames: _col4 - Select Operator - expressions: 1 (type: int), _col4 (type: int) - outputColumnNames: _col0, _col1 - ListSink - -PREHOOK: query: explain select explode(array(1,2,3,4,5)) as col -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -POSTHOOK: query: explain select explode(array(1,2,3,4,5)) as col -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Select Operator - expressions: array(1,2,3,4,5) (type: array) - outputColumnNames: _col0 - UDTF Operator - function name: explode - Select Operator - expressions: col (type: int) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: explain select explode(a) from HIVE_20262 -PREHOOK: type: QUERY -PREHOOK: Input: default@hive_20262 -#### A masked pattern was here #### -POSTHOOK: query: explain select explode(a) from HIVE_20262 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hive_20262 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: hive_20262 - Select Operator - expressions: a (type: array) - outputColumnNames: _col0 - UDTF Operator - function name: explode - Select Operator - expressions: col (type: int) - outputColumnNames: _col0 - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/ansi_sql_arithmetic.q.out b/ql/src/test/results/clientpositive/llap/ansi_sql_arithmetic.q.out deleted file mode 100644 index 6e77cbac71..0000000000 --- a/ql/src/test/results/clientpositive/llap/ansi_sql_arithmetic.q.out +++ /dev/null @@ -1,68 +0,0 @@ -PREHOOK: query: explain select cast(key as int) / cast(key as int) from src limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select cast(key as int) / cast(key as int) from src limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 1 - Processor Tree: - TableScan - alias: src - Select Operator - expressions: (CAST( UDFToInteger(key) AS decimal(10,0)) / CAST( UDFToInteger(key) AS decimal(10,0))) (type: decimal(21,11)) - outputColumnNames: _col0 - Limit - Number of rows: 1 - ListSink - -PREHOOK: query: select cast(key as int) / cast(key as int) from src limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select cast(key as int) / cast(key as int) from src limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -1.00000000000 -PREHOOK: query: explain select cast(key as int) / cast(key as int) from src limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select cast(key as int) / cast(key as int) from src limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 1 - Processor Tree: - TableScan - alias: src - Select Operator - expressions: (UDFToDouble(UDFToInteger(key)) / UDFToDouble(UDFToInteger(key))) (type: double) - outputColumnNames: _col0 - Limit - Number of rows: 1 - ListSink - -PREHOOK: query: select cast(key as int) / cast(key as int) from src limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select cast(key as int) / cast(key as int) from src limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -1.0 diff --git a/ql/src/test/results/clientpositive/llap/array_size_estimation.q.out b/ql/src/test/results/clientpositive/llap/array_size_estimation.q.out deleted file mode 100644 index ef11474b27..0000000000 --- a/ql/src/test/results/clientpositive/llap/array_size_estimation.q.out +++ /dev/null @@ -1,117 +0,0 @@ -PREHOOK: query: create table t_n19 (col string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t_n19 -POSTHOOK: query: create table t_n19 (col string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t_n19 -PREHOOK: query: insert into t_n19 values ('x') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@t_n19 -POSTHOOK: query: insert into t_n19 values ('x') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@t_n19 -POSTHOOK: Lineage: t_n19.col SCRIPT [] -PREHOOK: query: explain -select array("b", "d", "c", "a") FROM t_n19 -PREHOOK: type: QUERY -PREHOOK: Input: default@t_n19 -#### A masked pattern was here #### -POSTHOOK: query: explain -select array("b", "d", "c", "a") FROM t_n19 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t_n19 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: t_n19 - Select Operator - expressions: array('b','d','c','a') (type: array) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: explain -select array("b", "d", "c", col) FROM t_n19 -PREHOOK: type: QUERY -PREHOOK: Input: default@t_n19 -#### A masked pattern was here #### -POSTHOOK: query: explain -select array("b", "d", "c", col) FROM t_n19 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t_n19 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: t_n19 - Select Operator - expressions: array('b','d','c',col) (type: array) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: explain -select sort_array(array("b", "d", "c", "a")),array("1","2") FROM t_n19 -PREHOOK: type: QUERY -PREHOOK: Input: default@t_n19 -#### A masked pattern was here #### -POSTHOOK: query: explain -select sort_array(array("b", "d", "c", "a")),array("1","2") FROM t_n19 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t_n19 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: t_n19 - Select Operator - expressions: sort_array(array('b','d','c','a')) (type: array), array('1','2') (type: array) - outputColumnNames: _col0, _col1 - ListSink - -PREHOOK: query: explain -select sort_array(array("b", "d", "c", col)),array("1","2") FROM t_n19 -PREHOOK: type: QUERY -PREHOOK: Input: default@t_n19 -#### A masked pattern was here #### -POSTHOOK: query: explain -select sort_array(array("b", "d", "c", col)),array("1","2") FROM t_n19 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t_n19 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: t_n19 - Select Operator - expressions: sort_array(array('b','d','c',col)) (type: array), array('1','2') (type: array) - outputColumnNames: _col0, _col1 - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_5a.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_5a.q.out deleted file mode 100644 index f4c96f9ade..0000000000 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_5a.q.out +++ /dev/null @@ -1,708 +0,0 @@ -PREHOOK: query: CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@partitioned1 -POSTHOOK: query: CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@partitioned1 -PREHOOK: query: explain extended -insert into table partitioned1 partition(part=1) values(1, 'original') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@partitioned1@part=1 -POSTHOOK: query: explain extended -insert into table partitioned1 partition(part=1) values(1, 'original') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@partitioned1@part=1 -Explain -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: array(const struct(1,'original')) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - function name: inline - Select Operator - expressions: col1 (type: int), col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: part=1/ - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns a,b - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.partitioned1 - partition_columns part - partition_columns.types int - serialization.ddl struct partitioned1 { i32 a, string b} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.partitioned1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Select Operator - expressions: _col0 (type: int), _col1 (type: string), UDFToInteger('1') (type: int) - outputColumnNames: a, b, part - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') - keys: part (type: int) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct) - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: dummy_path - input format: org.apache.hadoop.hive.ql.io.NullRowsInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns - columns.comments - columns.types -#### A masked pattern was here #### - name _dummy_database._dummy_table - serialization.ddl struct _dummy_table { } - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - - input format: org.apache.hadoop.hive.ql.io.NullRowsInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns - columns.comments - columns.types -#### A masked pattern was here #### - name _dummy_database._dummy_table - serialization.ddl struct _dummy_table { } - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - name: _dummy_database._dummy_table - name: _dummy_database._dummy_table - Truncated Path -> Alias: -#### A masked pattern was here #### - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types struct:struct:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - part 1 - replace: false -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns a,b - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.partitioned1 - partition_columns part - partition_columns.types int - serialization.ddl struct partitioned1 { i32 a, string b} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.partitioned1 - - Stage: Stage-3 - Stats Work - Basic Stats Work: -#### A masked pattern was here #### - Column Stats Desc: - Columns: a, b - Column Types: int, string - Table: default.partitioned1 - Is Table Level Stats: false - -PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@partitioned1@part=1 -POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@partitioned1@part=1 -POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a SCRIPT [] -POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SCRIPT [] -col1 col2 -PREHOOK: query: desc formatted partitioned1 partition(part=1) -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@partitioned1 -POSTHOOK: query: desc formatted partitioned1 partition(part=1) -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@partitioned1 -col_name data_type comment -# col_name data_type comment -a int -b string - -# Partition Information -# col_name data_type comment -part int - -# Detailed Partition Information -Partition Value: [1] -Database: default -Table: partitioned1 -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} - numFiles 1 - numRows 1 - rawDataSize 10 - totalSize 11 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: explain extended -insert into table partitioned1 partition(part=1) values(2, 'original'), (3, 'original'),(4, 'original') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@partitioned1@part=1 -POSTHOOK: query: explain extended -insert into table partitioned1 partition(part=1) values(2, 'original'), (3, 'original'),(4, 'original') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@partitioned1@part=1 -Explain -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: array(const struct(2,'original'),const struct(3,'original'),const struct(4,'original')) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - function name: inline - Select Operator - expressions: col1 (type: int), col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: part=1/ - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns a,b - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.partitioned1 - partition_columns part - partition_columns.types int - serialization.ddl struct partitioned1 { i32 a, string b} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.partitioned1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Select Operator - expressions: _col0 (type: int), _col1 (type: string), UDFToInteger('1') (type: int) - outputColumnNames: a, b, part - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') - keys: part (type: int) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col1 (type: struct), _col2 (type: struct) - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: dummy_path - input format: org.apache.hadoop.hive.ql.io.NullRowsInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns - columns.comments - columns.types -#### A masked pattern was here #### - name _dummy_database._dummy_table - serialization.ddl struct _dummy_table { } - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - - input format: org.apache.hadoop.hive.ql.io.NullRowsInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns - columns.comments - columns.types -#### A masked pattern was here #### - name _dummy_database._dummy_table - serialization.ddl struct _dummy_table { } - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - name: _dummy_database._dummy_table - name: _dummy_database._dummy_table - Truncated Path -> Alias: -#### A masked pattern was here #### - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types struct:struct:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - part 1 - replace: false -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns a,b - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.partitioned1 - partition_columns part - partition_columns.types int - serialization.ddl struct partitioned1 { i32 a, string b} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.partitioned1 - - Stage: Stage-3 - Stats Work - Basic Stats Work: -#### A masked pattern was here #### - Column Stats Desc: - Columns: a, b - Column Types: int, string - Table: default.partitioned1 - Is Table Level Stats: false - -PREHOOK: query: insert into table partitioned1 partition(part=1) values(2, 'original'), (3, 'original'),(4, 'original') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@partitioned1@part=1 -POSTHOOK: query: insert into table partitioned1 partition(part=1) values(2, 'original'), (3, 'original'),(4, 'original') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@partitioned1@part=1 -POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a SCRIPT [] -POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SCRIPT [] -col1 col2 -PREHOOK: query: explain insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@partitioned1@part=1 -POSTHOOK: query: explain insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@partitioned1@part=1 -Explain -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: array(const struct(1,'original'),const struct(2,'original'),const struct(3,'original'),const struct(4,'original')) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - function name: inline - Select Operator - expressions: col1 (type: int), col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.partitioned1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), UDFToInteger('1') (type: int) - outputColumnNames: a, b, part - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') - keys: part (type: int) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 868 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - part 1 - replace: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.partitioned1 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: a, b - Column Types: int, string - Table: default.partitioned1 - -PREHOOK: query: desc formatted partitioned1 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@partitioned1 -POSTHOOK: query: desc formatted partitioned1 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@partitioned1 -col_name data_type comment -# col_name data_type comment -a int -b string - -# Partition Information -# col_name data_type comment -part int - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - bucketing_version 2 - numFiles 2 - numPartitions 1 - numRows 4 - rawDataSize 40 - totalSize 44 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: desc formatted partitioned1 partition(part=1) -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@partitioned1 -POSTHOOK: query: desc formatted partitioned1 partition(part=1) -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@partitioned1 -col_name data_type comment -# col_name data_type comment -a int -b string - -# Partition Information -# col_name data_type comment -part int - -# Detailed Partition Information -Partition Value: [1] -Database: default -Table: partitioned1 -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} - numFiles 2 - numRows 4 - rawDataSize 40 - totalSize 44 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: desc formatted partitioned1 partition(part=1) a -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@partitioned1 -POSTHOOK: query: desc formatted partitioned1 partition(part=1) a -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@partitioned1 -column_property value -col_name a -data_type int -min 1 -max 4 -num_nulls 0 -distinct_count 4 -avg_col_len -max_col_len -num_trues -num_falses -bit_vector HL -comment from deserializer diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_7.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_7.q.out deleted file mode 100644 index 797ccdd911..0000000000 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_7.q.out +++ /dev/null @@ -1,206 +0,0 @@ -PREHOOK: query: CREATE TABLE dest_g2_n5(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest_g2_n5 -POSTHOOK: query: CREATE TABLE dest_g2_n5(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest_g2_n5 -PREHOOK: query: CREATE TEMPORARY TABLE src_temp AS SELECT * FROM src -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@src_temp -POSTHOOK: query: CREATE TEMPORARY TABLE src_temp AS SELECT * FROM src -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_temp -PREHOOK: query: explain FROM src_temp -INSERT OVERWRITE TABLE dest_g2_n5 SELECT substr(src_temp.key,1,1), count(DISTINCT substr(src_temp.value,5)), concat(substr(src_temp.key,1,1),sum(substr(src_temp.value,5))) GROUP BY substr(src_temp.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src_temp -PREHOOK: Output: default@dest_g2_n5 -POSTHOOK: query: explain FROM src_temp -INSERT OVERWRITE TABLE dest_g2_n5 SELECT substr(src_temp.key,1,1), count(DISTINCT substr(src_temp.value,5)), concat(substr(src_temp.key,1,1),sum(substr(src_temp.value,5))) GROUP BY substr(src_temp.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_temp -POSTHOOK: Output: default@dest_g2_n5 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src_temp - Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: double) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g2_n5 - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - outputColumnNames: key, c1, c2 - Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), c1 (type: int), c2 (type: string) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') - mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1688 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g2_n5 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, c1, c2 - Column Types: string, int, string - Table: default.dest_g2_n5 - -PREHOOK: query: FROM src_temp -INSERT OVERWRITE TABLE dest_g2_n5 SELECT substr(src_temp.key,1,1), count(DISTINCT substr(src_temp.value,5)), concat(substr(src_temp.key,1,1),sum(substr(src_temp.value,5))) GROUP BY substr(src_temp.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src_temp -PREHOOK: Output: default@dest_g2_n5 -POSTHOOK: query: FROM src_temp -INSERT OVERWRITE TABLE dest_g2_n5 SELECT substr(src_temp.key,1,1), count(DISTINCT substr(src_temp.value,5)), concat(substr(src_temp.key,1,1),sum(substr(src_temp.value,5))) GROUP BY substr(src_temp.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_temp -POSTHOOK: Output: default@dest_g2_n5 -POSTHOOK: Lineage: dest_g2_n5.c1 EXPRESSION [(src_temp)src_temp.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: dest_g2_n5.c2 EXPRESSION [(src_temp)src_temp.FieldSchema(name:key, type:string, comment:null), (src_temp)src_temp.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: dest_g2_n5.key EXPRESSION [(src_temp)src_temp.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT dest_g2_n5.* FROM dest_g2_n5 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_g2_n5 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest_g2_n5.* FROM dest_g2_n5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_g2_n5 -#### A masked pattern was here #### -1 71 116414.0 -3 62 332004.0 -4 74 452763.0 -6 5 6398.0 -0 1 00.0 -2 69 225571.0 -5 6 5397.0 -7 6 7735.0 -8 8 8762.0 -9 7 91047.0 -PREHOOK: query: DROP TABLE dest_g2_n5 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@dest_g2_n5 -PREHOOK: Output: default@dest_g2_n5 -POSTHOOK: query: DROP TABLE dest_g2_n5 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@dest_g2_n5 -POSTHOOK: Output: default@dest_g2_n5 -PREHOOK: query: DROP TABLE src_temp -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@src_temp -PREHOOK: Output: default@src_temp -POSTHOOK: query: DROP TABLE src_temp -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@src_temp -POSTHOOK: Output: default@src_temp diff --git a/ql/src/test/results/clientpositive/llap/autoColumnStats_9.q.out b/ql/src/test/results/clientpositive/llap/autoColumnStats_9.q.out deleted file mode 100644 index d3542c947f..0000000000 --- a/ql/src/test/results/clientpositive/llap/autoColumnStats_9.q.out +++ /dev/null @@ -1,247 +0,0 @@ -PREHOOK: query: CREATE TABLE dest_j1_n23(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest_j1_n23 -POSTHOOK: query: CREATE TABLE dest_j1_n23(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest_j1_n23 -PREHOOK: query: EXPLAIN -FROM src src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest_j1_n23 SELECT src1.key, src2.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest_j1_n23 -POSTHOOK: query: EXPLAIN -FROM src src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest_j1_n23 SELECT src1.key, src2.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest_j1_n23 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1_n23 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1_n23 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest_j1_n23 - -PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest_j1_n23 SELECT src1.key, src2.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest_j1_n23 -POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest_j1_n23 SELECT src1.key, src2.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest_j1_n23 -POSTHOOK: Lineage: dest_j1_n23.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1_n23.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select 'cnt, check desc',count(*) from dest_j1_n23 group by key*key >= 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_j1_n23 -#### A masked pattern was here #### -POSTHOOK: query: select 'cnt, check desc',count(*) from dest_j1_n23 group by key*key >= 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_j1_n23 -#### A masked pattern was here #### -cnt, check desc 1028 -PREHOOK: query: desc formatted dest_j1_n23 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@dest_j1_n23 -POSTHOOK: query: desc formatted dest_j1_n23 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@dest_j1_n23 -# col_name data_type comment -key int -value string - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} - bucketing_version 2 - numFiles 1 - numRows 1028 - rawDataSize 10968 - totalSize 11996 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: desc formatted dest_j1_n23 key -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@dest_j1_n23 -POSTHOOK: query: desc formatted dest_j1_n23 key -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@dest_j1_n23 -col_name key -data_type int -min 0 -max 498 -num_nulls 0 -distinct_count 303 -avg_col_len -max_col_len -num_trues -num_falses -bit_vector HL -comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} -PREHOOK: query: desc formatted dest_j1_n23 value -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@dest_j1_n23 -POSTHOOK: query: desc formatted dest_j1_n23 value -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@dest_j1_n23 -col_name value -data_type string -min -max -num_nulls 0 -distinct_count 307 -avg_col_len 6.834630350194552 -max_col_len 7 -num_trues -num_falses -bit_vector HL -comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/llap/auto_join10.q.out b/ql/src/test/results/clientpositive/llap/auto_join10.q.out deleted file mode 100644 index a4291eeb91..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join10.q.out +++ /dev/null @@ -1,135 +0,0 @@ -PREHOOK: query: explain -FROM -(SELECT src.* FROM src) x -JOIN -(SELECT src.* FROM src) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -FROM -(SELECT src.* FROM src) x -JOIN -(SELECT src.* FROM src) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: src - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - input vertices: - 0 Map 1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col1,_col2) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM -(SELECT src.* FROM src) x -JOIN -(SELECT src.* FROM src) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT src.* FROM src) x -JOIN -(SELECT src.* FROM src) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -103231310608 diff --git a/ql/src/test/results/clientpositive/llap/auto_join11.q.out b/ql/src/test/results/clientpositive/llap/auto_join11.q.out deleted file mode 100644 index 5a400efc4f..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join11.q.out +++ /dev/null @@ -1,135 +0,0 @@ -PREHOOK: query: explain -SELECT sum(hash(src1.c1, src2.c4)) -FROM -(SELECT src.key as c1, src.value as c2 from src) src1 -JOIN -(SELECT src.key as c3, src.value as c4 from src) src2 -ON src1.c1 = src2.c3 AND src1.c1 < 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT sum(hash(src1.c1, src2.c4)) -FROM -(SELECT src.key as c1, src.value as c2 from src) src1 -JOIN -(SELECT src.key as c3, src.value as c4 from src) src2 -ON src1.c1 = src2.c3 AND src1.c1 < 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 0 Map 1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0,_col2) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) -FROM -(SELECT src.key as c1, src.value as c2 from src) src1 -JOIN -(SELECT src.key as c3, src.value as c4 from src) src2 -ON src1.c1 = src2.c3 AND src1.c1 < 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) -FROM -(SELECT src.key as c1, src.value as c2 from src) src1 -JOIN -(SELECT src.key as c3, src.value as c4 from src) src2 -ON src1.c1 = src2.c3 AND src1.c1 < 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### --101333194320 diff --git a/ql/src/test/results/clientpositive/llap/auto_join12.q.out b/ql/src/test/results/clientpositive/llap/auto_join12.q.out deleted file mode 100644 index 98d6462ddb..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join12.q.out +++ /dev/null @@ -1,179 +0,0 @@ -PREHOOK: query: explain -SELECT sum(hash(src1.c1, src2.c4)) -FROM -(SELECT src.key as c1, src.value as c2 from src) src1 -JOIN -(SELECT src.key as c3, src.value as c4 from src) src2 -ON src1.c1 = src2.c3 AND src1.c1 < 100 -JOIN -(SELECT src.key as c5, src.value as c6 from src) src3 -ON src1.c1 = src3.c5 AND src3.c5 < 80 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT sum(hash(src1.c1, src2.c4)) -FROM -(SELECT src.key as c1, src.value as c2 from src) src1 -JOIN -(SELECT src.key as c3, src.value as c4 from src) src2 -ON src1.c1 = src2.c3 AND src1.c1 < 100 -JOIN -(SELECT src.key as c5, src.value as c6 from src) src3 -ON src1.c1 = src3.c5 AND src3.c5 < 80 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 80.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 80.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0,_col3) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 80.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 80.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 80.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 80.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) -FROM -(SELECT src.key as c1, src.value as c2 from src) src1 -JOIN -(SELECT src.key as c3, src.value as c4 from src) src2 -ON src1.c1 = src2.c3 AND src1.c1 < 100 -JOIN -(SELECT src.key as c5, src.value as c6 from src) src3 -ON src1.c1 = src3.c5 AND src3.c5 < 80 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) -FROM -(SELECT src.key as c1, src.value as c2 from src) src1 -JOIN -(SELECT src.key as c3, src.value as c4 from src) src2 -ON src1.c1 = src2.c3 AND src1.c1 < 100 -JOIN -(SELECT src.key as c5, src.value as c6 from src) src3 -ON src1.c1 = src3.c5 AND src3.c5 < 80 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### --136843922952 diff --git a/ql/src/test/results/clientpositive/llap/auto_join13.q.out b/ql/src/test/results/clientpositive/llap/auto_join13.q.out deleted file mode 100644 index 1f4522ef33..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join13.q.out +++ /dev/null @@ -1,178 +0,0 @@ -PREHOOK: query: explain -SELECT sum(hash(src1.c1, src2.c4)) -FROM -(SELECT src.key as c1, src.value as c2 from src) src1 -JOIN -(SELECT src.key as c3, src.value as c4 from src) src2 -ON src1.c1 = src2.c3 AND src1.c1 < 100 -JOIN -(SELECT src.key as c5, src.value as c6 from src) src3 -ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT sum(hash(src1.c1, src2.c4)) -FROM -(SELECT src.key as c1, src.value as c2 from src) src1 -JOIN -(SELECT src.key as c3, src.value as c4 from src) src2 -ON src1.c1 = src2.c3 AND src1.c1 < 100 -JOIN -(SELECT src.key as c5, src.value as c6 from src) src3 -ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 166 Data size: 43990 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) - 1 _col0 (type: double) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 4 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col2,_col1) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 200.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 200.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToDouble(key) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) -FROM -(SELECT src.key as c1, src.value as c2 from src) src1 -JOIN -(SELECT src.key as c3, src.value as c4 from src) src2 -ON src1.c1 = src2.c3 AND src1.c1 < 100 -JOIN -(SELECT src.key as c5, src.value as c6 from src) src3 -ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) -FROM -(SELECT src.key as c1, src.value as c2 from src) src1 -JOIN -(SELECT src.key as c3, src.value as c4 from src) src2 -ON src1.c1 = src2.c3 AND src1.c1 < 100 -JOIN -(SELECT src.key as c5, src.value as c6 from src) src3 -ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### --97670109576 diff --git a/ql/src/test/results/clientpositive/llap/auto_join14.q.out b/ql/src/test/results/clientpositive/llap/auto_join14.q.out deleted file mode 100644 index 708b0ea9cd..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join14.q.out +++ /dev/null @@ -1,178 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n83(c1 INT, c2 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n83 -POSTHOOK: query: CREATE TABLE dest1_n83(c1 INT, c2 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n83 -PREHOOK: query: explain -FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 -INSERT OVERWRITE TABLE dest1_n83 SELECT src.key, srcpart.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@dest1_n83 -POSTHOOK: query: explain -FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 -INSERT OVERWRITE TABLE dest1_n83 SELECT src.key, srcpart.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@dest1_n83 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 0 Map 1 - Statistics: Num rows: 174 Data size: 30972 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 174 Data size: 16530 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 174 Data size: 16530 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n83 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: c1, c2 - Statistics: Num rows: 174 Data size: 16530 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n83 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1, c2 - Column Types: int, string - Table: default.dest1_n83 - -PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 -INSERT OVERWRITE TABLE dest1_n83 SELECT src.key, srcpart.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@dest1_n83 -POSTHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 -INSERT OVERWRITE TABLE dest1_n83 SELECT src.key, srcpart.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@dest1_n83 -POSTHOOK: Lineage: dest1_n83.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n83.c2 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT sum(hash(dest1_n83.c1,dest1_n83.c2)) FROM dest1_n83 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n83 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(dest1_n83.c1,dest1_n83.c2)) FROM dest1_n83 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n83 -#### A masked pattern was here #### -404554174174 diff --git a/ql/src/test/results/clientpositive/llap/auto_join15.q.out b/ql/src/test/results/clientpositive/llap/auto_join15.q.out deleted file mode 100644 index 83974ca7fe..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join15.q.out +++ /dev/null @@ -1,136 +0,0 @@ -PREHOOK: query: explain -select sum(hash(a.k1,a.v1,a.k2, a.v2)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 -FROM src src1 JOIN src src2 ON (src1.key = src2.key) -SORT BY k1, v1, k2, v2 -) a -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select sum(hash(a.k1,a.v1,a.k2, a.v2)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 -FROM src src1 JOIN src src2 ON (src1.key = src2.key) -SORT BY k1, v1, k2, v2 -) a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 791 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0,_col1,_col2,_col3) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 791 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 -FROM src src1 JOIN src src2 ON (src1.key = src2.key) -SORT BY k1, v1, k2, v2 -) a -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 -FROM src src1 JOIN src src2 ON (src1.key = src2.key) -SORT BY k1, v1, k2, v2 -) a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -524272996896 diff --git a/ql/src/test/results/clientpositive/llap/auto_join16.q.out b/ql/src/test/results/clientpositive/llap/auto_join16.q.out deleted file mode 100644 index c0bcb1942c..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join16.q.out +++ /dev/null @@ -1,135 +0,0 @@ -PREHOOK: query: explain -SELECT sum(hash(subq.key, tab.value)) -FROM -(select a.key, a.value from src a where a.key > 10 ) subq -JOIN src tab -ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) -where tab.value < 200 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT sum(hash(subq.key, tab.value)) -FROM -(select a.key, a.value from src a where a.key > 10 ) subq -JOIN src tab -ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) -where tab.value < 200 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(value) < 200.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(value) < 200.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0,_col3) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.9818182 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: tab - filterExpr: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(value) < 200.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(value) < 200.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT sum(hash(subq.key, tab.value)) -FROM -(select a.key, a.value from src a where a.key > 10 ) subq -JOIN src tab -ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) -where tab.value < 200 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(subq.key, tab.value)) -FROM -(select a.key, a.value from src a where a.key > 10 ) subq -JOIN src tab -ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) -where tab.value < 200 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -NULL diff --git a/ql/src/test/results/clientpositive/llap/auto_join17.q.out b/ql/src/test/results/clientpositive/llap/auto_join17.q.out deleted file mode 100644 index e6500b52a5..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join17.q.out +++ /dev/null @@ -1,169 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n41(key1 INT, value1 STRING, key2 INT, value2 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n41 -POSTHOOK: query: CREATE TABLE dest1_n41(key1 INT, value1 STRING, key2 INT, value2 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n41 -PREHOOK: query: explain -FROM src src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n41 SELECT src1.*, src2.* -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n41 -POSTHOOK: query: explain -FROM src src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n41 SELECT src1.*, src2.* -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n41 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 791 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n41 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) - outputColumnNames: key1, value1, key2, value2 - Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n41 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key1, value1, key2, value2 - Column Types: int, string, int, string - Table: default.dest1_n41 - -PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n41 SELECT src1.*, src2.* -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n41 -POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n41 SELECT src1.*, src2.* -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n41 -POSTHOOK: Lineage: dest1_n41.key1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n41.key2 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n41.value1 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n41.value2 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT sum(hash(dest1_n41.key1,dest1_n41.value1,dest1_n41.key2,dest1_n41.value2)) FROM dest1_n41 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n41 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(dest1_n41.key1,dest1_n41.value1,dest1_n41.key2,dest1_n41.value2)) FROM dest1_n41 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n41 -#### A masked pattern was here #### --793937029770 diff --git a/ql/src/test/results/clientpositive/llap/auto_join18.q.out b/ql/src/test/results/clientpositive/llap/auto_join18.q.out deleted file mode 100644 index 505ce8ca5b..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join18.q.out +++ /dev/null @@ -1,211 +0,0 @@ -PREHOOK: query: explain - SELECT sum(hash(a.key, a.value, b.key, b.value)) - FROM - ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key - ) a - FULL OUTER JOIN - ( - SELECT src2.key as key, count(distinct(src2.value)) AS value - FROM src1 src2 group by src2.key - ) b - ON (a.key = b.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain - SELECT sum(hash(a.key, a.value, b.key, b.value)) - FROM - ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key - ) a - FULL OUTER JOIN - ( - SELECT src2.key as key, count(distinct(src2.value)) AS value - FROM src1 src2 group by src2.key - ) b - ON (a.key = b.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(value) - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: src2 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 274 Data size: 2457 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0,_col1,_col2,_col3) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 274 Data size: 2457 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT sum(hash(a.key, a.value, b.key, b.value)) - FROM - ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key - ) a - FULL OUTER JOIN - ( - SELECT src2.key as key, count(distinct(src2.value)) AS value - FROM src1 src2 group by src2.key - ) b - ON (a.key = b.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key, a.value, b.key, b.value)) - FROM - ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key - ) a - FULL OUTER JOIN - ( - SELECT src2.key as key, count(distinct(src2.value)) AS value - FROM src1 src2 group by src2.key - ) b - ON (a.key = b.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -379685492277 diff --git a/ql/src/test/results/clientpositive/llap/auto_join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/auto_join18_multi_distinct.q.out deleted file mode 100644 index c77bed7072..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join18_multi_distinct.q.out +++ /dev/null @@ -1,219 +0,0 @@ -PREHOOK: query: explain - SELECT sum(hash(a.key, a.value, b.key, b.value1, b.value2)) - FROM - ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key - ) a - FULL OUTER JOIN - ( - SELECT src2.key as key, count(distinct(src2.value)) AS value1, - count(distinct(src2.key)) AS value2 - FROM src1 src2 group by src2.key - ) b - ON (a.key = b.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain - SELECT sum(hash(a.key, a.value, b.key, b.value1, b.value2)) - FROM - ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key - ) a - FULL OUTER JOIN - ( - SELECT src2.key as key, count(distinct(src2.value)) AS value1, - count(distinct(src2.key)) AS value2 - FROM src1 src2 group by src2.key - ) b - ON (a.key = b.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(value) - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: src2 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 282 Data size: 3349 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0,_col1,_col2,_col3,_col4) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 282 Data size: 3349 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: CASE WHEN (((_col2 = 0L) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col2 = 1L) and _col0 is not null)) THEN (1) ELSE (null) END (type: int), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col0), count(_col1) - keys: _col2 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT sum(hash(a.key, a.value, b.key, b.value1, b.value2)) - FROM - ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key - ) a - FULL OUTER JOIN - ( - SELECT src2.key as key, count(distinct(src2.value)) AS value1, - count(distinct(src2.key)) AS value2 - FROM src1 src2 group by src2.key - ) b - ON (a.key = b.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key, a.value, b.key, b.value1, b.value2)) - FROM - ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key - ) a - FULL OUTER JOIN - ( - SELECT src2.key as key, count(distinct(src2.value)) AS value1, - count(distinct(src2.key)) AS value2 - FROM src1 src2 group by src2.key - ) b - ON (a.key = b.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -40694575227 diff --git a/ql/src/test/results/clientpositive/llap/auto_join19.q.out b/ql/src/test/results/clientpositive/llap/auto_join19.q.out deleted file mode 100644 index 93ab531bda..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join19.q.out +++ /dev/null @@ -1,191 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n18(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n18 -POSTHOOK: query: CREATE TABLE dest1_n18(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n18 -PREHOOK: query: explain -FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n18 SELECT src1.key, src2.value -where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@dest1_n18 -POSTHOOK: query: explain -FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n18 SELECT src1.key, src2.value -where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@dest1_n18 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 3164 Data size: 563192 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n18 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n18 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest1_n18 - -PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n18 SELECT src1.key, src2.value -where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@dest1_n18 -POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n18 SELECT src1.key, src2.value -where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@dest1_n18 -POSTHOOK: Lineage: dest1_n18.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n18.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT sum(hash(dest1_n18.key,dest1_n18.value)) FROM dest1_n18 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n18 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(dest1_n18.key,dest1_n18.value)) FROM dest1_n18 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n18 -#### A masked pattern was here #### -407444119660 diff --git a/ql/src/test/results/clientpositive/llap/auto_join19_inclause.q.out b/ql/src/test/results/clientpositive/llap/auto_join19_inclause.q.out deleted file mode 100644 index abab9ae0ab..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join19_inclause.q.out +++ /dev/null @@ -1,191 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n11(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n11 -POSTHOOK: query: CREATE TABLE dest1_n11(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n11 -PREHOOK: query: explain -FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n11 SELECT src1.key, src2.value -where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@dest1_n11 -POSTHOOK: query: explain -FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n11 SELECT src1.key, src2.value -where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@dest1_n11 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 3164 Data size: 563192 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n11 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 3164 Data size: 300580 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n11 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest1_n11 - -PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n11 SELECT src1.key, src2.value -where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@dest1_n11 -POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n11 SELECT src1.key, src2.value -where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@dest1_n11 -POSTHOOK: Lineage: dest1_n11.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n11.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT sum(hash(dest1_n11.key,dest1_n11.value)) FROM dest1_n11 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n11 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(dest1_n11.key,dest1_n11.value)) FROM dest1_n11 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n11 -#### A masked pattern was here #### -407444119660 diff --git a/ql/src/test/results/clientpositive/llap/auto_join2.q.out b/ql/src/test/results/clientpositive/llap/auto_join2.q.out deleted file mode 100644 index 9982478128..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join2.q.out +++ /dev/null @@ -1,199 +0,0 @@ -PREHOOK: query: CREATE TABLE dest_j2(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest_j2 -POSTHOOK: query: CREATE TABLE dest_j2(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest_j2 -PREHOOK: query: explain -FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) -INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest_j2 -POSTHOOK: query: explain -FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) -INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest_j2 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 791 Data size: 81473 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (_col1 + _col3) (type: double) - 1 _col1 (type: double) - outputColumnNames: _col0, _col4 - input vertices: - 1 Map 4 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src3 - filterExpr: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: double) - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest_j2 - -PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) -INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest_j2 -POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) -INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest_j2 -POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j2.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT sum(hash(dest_j2.key,dest_j2.value)) FROM dest_j2 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_j2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(dest_j2.key,dest_j2.value)) FROM dest_j2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_j2 -#### A masked pattern was here #### -33815990627 diff --git a/ql/src/test/results/clientpositive/llap/auto_join20.q.out b/ql/src/test/results/clientpositive/llap/auto_join20.q.out deleted file mode 100644 index 42292404a7..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join20.q.out +++ /dev/null @@ -1,336 +0,0 @@ -PREHOOK: query: explain -select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {_col2} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 0 Map 1 - Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -56157587016 -PREHOOK: query: explain -select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {_col2} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 0 Map 1 - Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -56157587016 diff --git a/ql/src/test/results/clientpositive/llap/auto_join22.q.out b/ql/src/test/results/clientpositive/llap/auto_join22.q.out deleted file mode 100644 index a7c9f81e86..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join22.q.out +++ /dev/null @@ -1,146 +0,0 @@ -PREHOOK: query: explain -SELECT sum(hash(src5.src1_value)) FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT sum(hash(src5.src1_value)) FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 1251 Data size: 113841 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col1) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1251 Data size: 113841 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src4 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 177500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 177500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT sum(hash(src5.src1_value)) FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(src5.src1_value)) FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -344337359100 diff --git a/ql/src/test/results/clientpositive/llap/auto_join23.q.out b/ql/src/test/results/clientpositive/llap/auto_join23.q.out deleted file mode 100644 index cf3cf1047c..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join23.q.out +++ /dev/null @@ -1,210 +0,0 @@ -Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: explain -SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - null sort order: zzzz - sort order: ++++ - Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 9 val_9 -0 val_0 9 val_9 -0 val_0 9 val_9 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 2 val_2 -2 val_2 4 val_4 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 8 val_8 -2 val_2 9 val_9 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 2 val_2 -4 val_4 4 val_4 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 8 val_8 -4 val_4 9 val_9 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 9 val_9 -5 val_5 9 val_9 -5 val_5 9 val_9 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 2 val_2 -8 val_8 4 val_4 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 8 val_8 -8 val_8 9 val_9 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 2 val_2 -9 val_9 4 val_4 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 8 val_8 -9 val_9 9 val_9 diff --git a/ql/src/test/results/clientpositive/llap/auto_join24.q.out b/ql/src/test/results/clientpositive/llap/auto_join24.q.out deleted file mode 100644 index 0f7f1e0112..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join24.q.out +++ /dev/null @@ -1,131 +0,0 @@ -PREHOOK: query: create table tst1_n2(key STRING, cnt INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tst1_n2 -POSTHOOK: query: create table tst1_n2(key STRING, cnt INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tst1_n2 -PREHOOK: query: INSERT OVERWRITE TABLE tst1_n2 -SELECT a.key, count(1) FROM src a group by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@tst1_n2 -POSTHOOK: query: INSERT OVERWRITE TABLE tst1_n2 -SELECT a.key, count(1) FROM src a group by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@tst1_n2 -POSTHOOK: Lineage: tst1_n2.cnt EXPRESSION [(src)a.null, ] -POSTHOOK: Lineage: tst1_n2.key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: explain -SELECT sum(a.cnt) FROM tst1_n2 a JOIN tst1_n2 b ON a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@tst1_n2 -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT sum(a.cnt) FROM tst1_n2 a JOIN tst1_n2 b ON a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tst1_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 309 Data size: 28119 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 309 Data size: 28119 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), cnt (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 309 Data size: 28119 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 309 Data size: 1236 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT sum(a.cnt) FROM tst1_n2 a JOIN tst1_n2 b ON a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@tst1_n2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(a.cnt) FROM tst1_n2 a JOIN tst1_n2 b ON a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tst1_n2 -#### A masked pattern was here #### -500 diff --git a/ql/src/test/results/clientpositive/llap/auto_join26.q.out b/ql/src/test/results/clientpositive/llap/auto_join26.q.out deleted file mode 100644 index cbe9af7367..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join26.q.out +++ /dev/null @@ -1,208 +0,0 @@ -PREHOOK: query: CREATE TABLE dest_j1(key INT, cnt INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest_j1 -POSTHOOK: query: CREATE TABLE dest_j1(key INT, cnt INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest_j1 -SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -PREHOOK: Output: default@dest_j1 -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest_j1 -SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -POSTHOOK: Output: default@dest_j1 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 0 Map 1 - Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.5897436 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Select Operator - expressions: _col0 (type: int), _col1 (type: int) - outputColumnNames: key, cnt - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') - minReductionHashAggr: 0.9375 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, cnt - Column Types: int, int - Table: default.dest_j1 - -PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -PREHOOK: Output: default@dest_j1 -POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ] -POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: select * from dest_j1 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: query: select * from dest_j1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -128 3 -146 2 -150 1 -213 2 -224 2 -238 2 -255 2 -273 3 -278 2 -311 3 -369 3 -401 5 -406 4 -66 1 -98 2 diff --git a/ql/src/test/results/clientpositive/llap/auto_join27.q.out b/ql/src/test/results/clientpositive/llap/auto_join27.q.out deleted file mode 100644 index 3d755e51fc..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join27.q.out +++ /dev/null @@ -1,210 +0,0 @@ -PREHOOK: query: explain -SELECT count(1) -FROM -( -SELECT src.key, src.value from src -UNION ALL -SELECT DISTINCT src.key, src.value from src -) src_12 -JOIN -(SELECT src.key as k, src.value as v from src) src3 -ON src_12.key = src3.k AND src3.k < 200 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT count(1) -FROM -( -SELECT src.key, src.value from src -UNION ALL -SELECT DISTINCT src.key, src.value from src -) src_12 -JOIN -(SELECT src.key as k, src.value as v from src) src3 -ON src_12.key = src3.k AND src3.k < 200 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) - Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 200.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 200.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 6 - Statistics: Num rows: 249 Data size: 1992 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 200.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 200.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 200.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 200.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 6 - Statistics: Num rows: 249 Data size: 1992 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Union 2 - Vertex: Union 2 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT count(1) -FROM -( -SELECT src.key, src.value from src -UNION ALL -SELECT DISTINCT src.key, src.value from src -) src_12 -JOIN -(SELECT src.key as k, src.value as v from src) src3 -ON src_12.key = src3.k AND src3.k < 200 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT count(1) -FROM -( -SELECT src.key, src.value from src -UNION ALL -SELECT DISTINCT src.key, src.value from src -) src_12 -JOIN -(SELECT src.key as k, src.value as v from src) src3 -ON src_12.key = src3.k AND src3.k < 200 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -548 diff --git a/ql/src/test/results/clientpositive/llap/auto_join28.q.out b/ql/src/test/results/clientpositive/llap/auto_join28.q.out deleted file mode 100644 index aebd305fb5..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join28.q.out +++ /dev/null @@ -1,515 +0,0 @@ -PREHOOK: query: explain -SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 0 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 0 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {_col2} - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 0 Map 1 - Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - filter predicates: - 0 {(UDFToDouble(_col0) < 10.0D)} - 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 1 Map 4 - Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: (UDFToDouble(key) > 10.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) > 10.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src3 - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {(UDFToDouble(_col0) > 10.0D)} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 1 Map 4 - Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src3 - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) > 10.0D) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - filter predicates: - 0 {_col2} - 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - input vertices: - 1 Map 2 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {_col2} - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 0 Map 1 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/auto_join3.q.out b/ql/src/test/results/clientpositive/llap/auto_join3.q.out deleted file mode 100644 index 6f0fcad352..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join3.q.out +++ /dev/null @@ -1,198 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n140(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n140 -POSTHOOK: query: CREATE TABLE dest1_n140(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n140 -PREHOOK: query: explain -FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) -INSERT OVERWRITE TABLE dest1_n140 SELECT src1.key, src3.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n140 -POSTHOOK: query: explain -FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) -INSERT OVERWRITE TABLE dest1_n140 SELECT src1.key, src3.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n140 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n140 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src3 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n140 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest1_n140 - -PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) -INSERT OVERWRITE TABLE dest1_n140 SELECT src1.key, src3.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n140 -POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) -INSERT OVERWRITE TABLE dest1_n140 SELECT src1.key, src3.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n140 -POSTHOOK: Lineage: dest1_n140.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n140.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT sum(hash(dest1_n140.key,dest1_n140.value)) FROM dest1_n140 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n140 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(dest1_n140.key,dest1_n140.value)) FROM dest1_n140 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n140 -#### A masked pattern was here #### -344360994461 diff --git a/ql/src/test/results/clientpositive/llap/auto_join31.q.out b/ql/src/test/results/clientpositive/llap/auto_join31.q.out deleted file mode 100644 index 28fa6d02b5..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join31.q.out +++ /dev/null @@ -1,179 +0,0 @@ -PREHOOK: query: explain -FROM -(SELECT src.* FROM src sort by key) x -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -JOIN -(SELECT src.* FROM src sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -FROM -(SELECT src.* FROM src sort by key) x -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -JOIN -(SELECT src.* FROM src sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col2,_col3) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM -(SELECT src.* FROM src sort by key) x -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -JOIN -(SELECT src.* FROM src sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT src.* FROM src sort by key) x -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -JOIN -(SELECT src.* FROM src sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -348019368476 diff --git a/ql/src/test/results/clientpositive/llap/auto_join32.q.out b/ql/src/test/results/clientpositive/llap/auto_join32.q.out deleted file mode 100644 index e264f2fa0e..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join32.q.out +++ /dev/null @@ -1,695 +0,0 @@ -PREHOOK: query: create table studenttab10k (name string, age int, gpa double) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@studenttab10k -POSTHOOK: query: create table studenttab10k (name string, age int, gpa double) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@studenttab10k -PREHOOK: query: create table votertab10k (name string, age int, registration string, contributions float) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@votertab10k -POSTHOOK: query: create table votertab10k (name string, age int, registration string, contributions float) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@votertab10k -PREHOOK: query: explain select s.name, count(distinct registration) -from studenttab10k s join votertab10k v -on (s.name = v.name) -group by s.name -PREHOOK: type: QUERY -PREHOOK: Input: default@studenttab10k -PREHOOK: Input: default@votertab10k -#### A masked pattern was here #### -POSTHOOK: query: explain select s.name, count(distinct registration) -from studenttab10k s join votertab10k v -on (s.name = v.name) -group by s.name -POSTHOOK: type: QUERY -POSTHOOK: Input: default@studenttab10k -POSTHOOK: Input: default@votertab10k -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - filterExpr: name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: v - filterExpr: name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: name (type: string), registration (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 0 Map 1 - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col2 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select s.name, count(distinct registration) -from studenttab10k s join votertab10k v -on (s.name = v.name) -group by s.name -PREHOOK: type: QUERY -PREHOOK: Input: default@studenttab10k -PREHOOK: Input: default@votertab10k -#### A masked pattern was here #### -POSTHOOK: query: select s.name, count(distinct registration) -from studenttab10k s join votertab10k v -on (s.name = v.name) -group by s.name -POSTHOOK: type: QUERY -POSTHOOK: Input: default@studenttab10k -POSTHOOK: Input: default@votertab10k -#### A masked pattern was here #### -PREHOOK: query: create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@studenttab10k_smb -POSTHOOK: query: create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@studenttab10k_smb -PREHOOK: query: create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@votertab10k_smb -POSTHOOK: query: create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@votertab10k_smb -PREHOOK: query: explain select s.name, count(distinct registration) -from studenttab10k_smb s join votertab10k_smb v -on (s.name = v.name) -group by s.name -PREHOOK: type: QUERY -PREHOOK: Input: default@studenttab10k_smb -PREHOOK: Input: default@votertab10k_smb -#### A masked pattern was here #### -POSTHOOK: query: explain select s.name, count(distinct registration) -from studenttab10k_smb s join votertab10k_smb v -on (s.name = v.name) -group by s.name -POSTHOOK: type: QUERY -POSTHOOK: Input: default@studenttab10k_smb -POSTHOOK: Input: default@votertab10k_smb -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - filterExpr: name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: v - filterExpr: name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: name (type: string), registration (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 0 Map 1 - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col2 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select s.name, count(distinct registration) -from studenttab10k_smb s join votertab10k_smb v -on (s.name = v.name) -group by s.name -PREHOOK: type: QUERY -PREHOOK: Input: default@studenttab10k_smb -PREHOOK: Input: default@votertab10k_smb -#### A masked pattern was here #### -POSTHOOK: query: select s.name, count(distinct registration) -from studenttab10k_smb s join votertab10k_smb v -on (s.name = v.name) -group by s.name -POSTHOOK: type: QUERY -POSTHOOK: Input: default@studenttab10k_smb -POSTHOOK: Input: default@votertab10k_smb -#### A masked pattern was here #### -PREHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table studenttab10k_smb -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@studenttab10k_smb -POSTHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table studenttab10k_smb -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@studenttab10k_smb -PREHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table studenttab10k_smb -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@studenttab10k_smb -POSTHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table studenttab10k_smb -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@studenttab10k_smb -PREHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table votertab10k_smb -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@votertab10k_smb -POSTHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table votertab10k_smb -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@votertab10k_smb -PREHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table votertab10k_smb -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@votertab10k_smb -POSTHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table votertab10k_smb -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@votertab10k_smb -PREHOOK: query: explain select s.name, count(distinct registration) -from studenttab10k_smb s join votertab10k_smb v -on (s.name = v.name) -group by s.name -PREHOOK: type: QUERY -PREHOOK: Input: default@studenttab10k_smb -PREHOOK: Input: default@votertab10k_smb -#### A masked pattern was here #### -POSTHOOK: query: explain select s.name, count(distinct registration) -from studenttab10k_smb s join votertab10k_smb v -on (s.name = v.name) -group by s.name -POSTHOOK: type: QUERY -POSTHOOK: Input: default@studenttab10k_smb -POSTHOOK: Input: default@votertab10k_smb -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - filterExpr: name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: v - filterExpr: name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: name (type: string), registration (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 0 Map 1 - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col2 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select s.name, count(distinct registration) -from studenttab10k_smb s join votertab10k_smb v -on (s.name = v.name) -group by s.name -PREHOOK: type: QUERY -PREHOOK: Input: default@studenttab10k_smb -PREHOOK: Input: default@votertab10k_smb -#### A masked pattern was here #### -POSTHOOK: query: select s.name, count(distinct registration) -from studenttab10k_smb s join votertab10k_smb v -on (s.name = v.name) -group by s.name -POSTHOOK: type: QUERY -POSTHOOK: Input: default@studenttab10k_smb -POSTHOOK: Input: default@votertab10k_smb -#### A masked pattern was here #### -PREHOOK: query: create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@studenttab10k_part -POSTHOOK: query: create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@studenttab10k_part -PREHOOK: query: create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@votertab10k_part -POSTHOOK: query: create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@votertab10k_part -PREHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table studenttab10k_part partition (p='foo') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@studenttab10k_part -POSTHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table studenttab10k_part partition (p='foo') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@studenttab10k_part -POSTHOOK: Output: default@studenttab10k_part@p=foo -PREHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table studenttab10k_part partition (p='foo') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@studenttab10k_part@p=foo -POSTHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table studenttab10k_part partition (p='foo') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@studenttab10k_part@p=foo -PREHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table votertab10k_part partition (p='foo') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@votertab10k_part -POSTHOOK: query: load data local inpath '../../data/files/empty/000000_0' into table votertab10k_part partition (p='foo') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@votertab10k_part -POSTHOOK: Output: default@votertab10k_part@p=foo -PREHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table votertab10k_part partition (p='foo') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@votertab10k_part@p=foo -POSTHOOK: query: load data local inpath '../../data/files/empty/000001_0' into table votertab10k_part partition (p='foo') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@votertab10k_part@p=foo -PREHOOK: query: explain select s.name, count(distinct registration) -from studenttab10k_part s join votertab10k_part v -on (s.name = v.name) -where s.p = 'bar' -and v.p = 'bar' -group by s.name -PREHOOK: type: QUERY -PREHOOK: Input: default@studenttab10k_part -PREHOOK: Input: default@votertab10k_part -#### A masked pattern was here #### -POSTHOOK: query: explain select s.name, count(distinct registration) -from studenttab10k_part s join votertab10k_part v -on (s.name = v.name) -where s.p = 'bar' -and v.p = 'bar' -group by s.name -POSTHOOK: type: QUERY -POSTHOOK: Input: default@studenttab10k_part -POSTHOOK: Input: default@votertab10k_part -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: s - filterExpr: ((p = 'bar') and name is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((p = 'bar') and name is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col2 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: unknown - Map 3 - Map Operator Tree: - TableScan - alias: v - filterExpr: ((p = 'bar') and name is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((p = 'bar') and name is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: name (type: string), registration (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: unknown - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select s.name, count(distinct registration) -from studenttab10k_part s join votertab10k_part v -on (s.name = v.name) -where s.p = 'bar' -and v.p = 'bar' -group by s.name -PREHOOK: type: QUERY -PREHOOK: Input: default@studenttab10k_part -PREHOOK: Input: default@votertab10k_part -#### A masked pattern was here #### -POSTHOOK: query: select s.name, count(distinct registration) -from studenttab10k_part s join votertab10k_part v -on (s.name = v.name) -where s.p = 'bar' -and v.p = 'bar' -group by s.name -POSTHOOK: type: QUERY -POSTHOOK: Input: default@studenttab10k_part -POSTHOOK: Input: default@votertab10k_part -#### A masked pattern was here #### -PREHOOK: query: drop table studenttab10k -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@studenttab10k -PREHOOK: Output: default@studenttab10k -POSTHOOK: query: drop table studenttab10k -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@studenttab10k -POSTHOOK: Output: default@studenttab10k -PREHOOK: query: drop table votertab10k -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@votertab10k -PREHOOK: Output: default@votertab10k -POSTHOOK: query: drop table votertab10k -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@votertab10k -POSTHOOK: Output: default@votertab10k -PREHOOK: query: drop table studenttab10k_smb -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@studenttab10k_smb -PREHOOK: Output: default@studenttab10k_smb -POSTHOOK: query: drop table studenttab10k_smb -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@studenttab10k_smb -POSTHOOK: Output: default@studenttab10k_smb -PREHOOK: query: drop table votertab10k_smb -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@votertab10k_smb -PREHOOK: Output: default@votertab10k_smb -POSTHOOK: query: drop table votertab10k_smb -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@votertab10k_smb -POSTHOOK: Output: default@votertab10k_smb -PREHOOK: query: drop table studenttab10k_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@studenttab10k_part -PREHOOK: Output: default@studenttab10k_part -POSTHOOK: query: drop table studenttab10k_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@studenttab10k_part -POSTHOOK: Output: default@studenttab10k_part -PREHOOK: query: drop table votertab10k_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@votertab10k_part -PREHOOK: Output: default@votertab10k_part -POSTHOOK: query: drop table votertab10k_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@votertab10k_part -POSTHOOK: Output: default@votertab10k_part diff --git a/ql/src/test/results/clientpositive/llap/auto_join33.q.out b/ql/src/test/results/clientpositive/llap/auto_join33.q.out deleted file mode 100644 index 7be5929513..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join33.q.out +++ /dev/null @@ -1,110 +0,0 @@ -PREHOOK: query: explain -SELECT * FROM - (SELECT * FROM src WHERE key+1 < 10) a - JOIN - (SELECT * FROM src WHERE key+2 < 10) b - ON a.key+1=b.key+2 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT * FROM - (SELECT * FROM src WHERE key+1 < 10) a - JOIN - (SELECT * FROM src WHERE key+2 < 10) b - ON a.key+1=b.key+2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (((UDFToDouble(key) + 1.0D) < 10.0D) and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (((UDFToDouble(key) + 1.0D) < 10.0D) and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (UDFToDouble(_col0) + 1.0D) (type: double) - 1 (UDFToDouble(_col0) + 2.0D) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: src - filterExpr: (((UDFToDouble(key) + 2.0D) < 10.0D) and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (((UDFToDouble(key) + 2.0D) < 10.0D) and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (UDFToDouble(_col0) + 2.0D) (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: (UDFToDouble(_col0) + 2.0D) (type: double) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT * FROM - (SELECT * FROM src WHERE key+1 < 10) a - JOIN - (SELECT * FROM src WHERE key+2 < 10) b - ON a.key+1=b.key+2 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM - (SELECT * FROM src WHERE key+1 < 10) a - JOIN - (SELECT * FROM src WHERE key+2 < 10) b - ON a.key+1=b.key+2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 4 val_4 diff --git a/ql/src/test/results/clientpositive/llap/auto_join4.q.out b/ql/src/test/results/clientpositive/llap/auto_join4.q.out deleted file mode 100644 index 341c892a7b..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join4.q.out +++ /dev/null @@ -1,213 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n115(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n115 -POSTHOOK: query: CREATE TABLE dest1_n115(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n115 -PREHOOK: query: explain -FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - LEFT OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n115 SELECT c.c1, c.c2, c.c3, c.c4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n115 -POSTHOOK: query: explain -FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - LEFT OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n115 SELECT c.c1, c.c2, c.c3, c.c4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n115 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n115 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) - outputColumnNames: c1, c2, c3, c4 - Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') - minReductionHashAggr: 0.9818182 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n115 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1, c2, c3, c4 - Column Types: int, string, int, string - Table: default.dest1_n115 - -PREHOOK: query: FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - LEFT OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n115 SELECT c.c1, c.c2, c.c3, c.c4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n115 -POSTHOOK: query: FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - LEFT OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n115 SELECT c.c1, c.c2, c.c3, c.c4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n115 -POSTHOOK: Lineage: dest1_n115.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n115.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n115.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n115.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT sum(hash(dest1_n115.c1,dest1_n115.c2,dest1_n115.c3,dest1_n115.c4)) FROM dest1_n115 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n115 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(dest1_n115.c1,dest1_n115.c2,dest1_n115.c3,dest1_n115.c4)) FROM dest1_n115 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n115 -#### A masked pattern was here #### -5079148035 diff --git a/ql/src/test/results/clientpositive/llap/auto_join5.q.out b/ql/src/test/results/clientpositive/llap/auto_join5.q.out deleted file mode 100644 index 6419b80bb9..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join5.q.out +++ /dev/null @@ -1,213 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n64(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n64 -POSTHOOK: query: CREATE TABLE dest1_n64(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n64 -PREHOOK: query: explain -FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - RIGHT OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n64 SELECT c.c1, c.c2, c.c3, c.c4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n64 -POSTHOOK: query: explain -FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - RIGHT OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n64 SELECT c.c1, c.c2, c.c3, c.c4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n64 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: ((UDFToDouble(key) < 20.0D) and (UDFToDouble(key) > 15.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) < 20.0D) and (UDFToDouble(key) > 15.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n64 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) - outputColumnNames: c1, c2, c3, c4 - Statistics: Num rows: 55 Data size: 10450 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') - minReductionHashAggr: 0.9818182 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n64 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1, c2, c3, c4 - Column Types: int, string, int, string - Table: default.dest1_n64 - -PREHOOK: query: FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - RIGHT OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n64 SELECT c.c1, c.c2, c.c3, c.c4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n64 -POSTHOOK: query: FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - RIGHT OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n64 SELECT c.c1, c.c2, c.c3, c.c4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n64 -POSTHOOK: Lineage: dest1_n64.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n64.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n64.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n64.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT sum(hash(dest1_n64.c1,dest1_n64.c2,dest1_n64.c3,dest1_n64.c4)) FROM dest1_n64 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n64 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(dest1_n64.c1,dest1_n64.c2,dest1_n64.c3,dest1_n64.c4)) FROM dest1_n64 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n64 -#### A masked pattern was here #### -9766083196 diff --git a/ql/src/test/results/clientpositive/llap/auto_join6.q.out b/ql/src/test/results/clientpositive/llap/auto_join6.q.out deleted file mode 100644 index aac9321390..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join6.q.out +++ /dev/null @@ -1,221 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n9(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n9 -POSTHOOK: query: CREATE TABLE dest1_n9(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n9 -PREHOOK: query: explain -FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - FULL OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n9 SELECT c.c1, c.c2, c.c3, c.c4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n9 -POSTHOOK: query: explain -FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - FULL OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n9 SELECT c.c1, c.c2, c.c3, c.c4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n9 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 110 Data size: 39160 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 110 Data size: 20900 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 110 Data size: 20900 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n9 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) - outputColumnNames: c1, c2, c3, c4 - Statistics: Num rows: 110 Data size: 20900 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n9 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1, c2, c3, c4 - Column Types: int, string, int, string - Table: default.dest1_n9 - -PREHOOK: query: FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - FULL OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n9 SELECT c.c1, c.c2, c.c3, c.c4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n9 -POSTHOOK: query: FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - FULL OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n9 SELECT c.c1, c.c2, c.c3, c.c4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n9 -POSTHOOK: Lineage: dest1_n9.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n9.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n9.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n9.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT sum(hash(dest1_n9.c1,dest1_n9.c2,dest1_n9.c3,dest1_n9.c4)) FROM dest1_n9 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n9 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(dest1_n9.c1,dest1_n9.c2,dest1_n9.c3,dest1_n9.c4)) FROM dest1_n9 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n9 -#### A masked pattern was here #### -2607643291 diff --git a/ql/src/test/results/clientpositive/llap/auto_join7.q.out b/ql/src/test/results/clientpositive/llap/auto_join7.q.out deleted file mode 100644 index cd3b8c4c9d..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join7.q.out +++ /dev/null @@ -1,275 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n147(c1 INT, c2 STRING, c3 INT, c4 STRING, c5 INT, c6 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n147 -POSTHOOK: query: CREATE TABLE dest1_n147(c1 INT, c2 STRING, c3 INT, c4 STRING, c5 INT, c6 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n147 -PREHOOK: query: explain -FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - FULL OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - LEFT OUTER JOIN - ( - FROM src src3 SELECT src3.key AS c5, src3.value AS c6 WHERE src3.key > 20 and src3.key < 25 - ) c - ON (a.c1 = c.c5) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 -) c -INSERT OVERWRITE TABLE dest1_n147 SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n147 -POSTHOOK: query: explain -FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - FULL OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - LEFT OUTER JOIN - ( - FROM src src3 SELECT src3.key AS c5, src3.value AS c6 WHERE src3.key > 20 and src3.key < 25 - ) c - ON (a.c1 = c.c5) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 -) c -INSERT OVERWRITE TABLE dest1_n147 SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n147 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: src3 - filterExpr: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 110 Data size: 39160 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 1 Map 5 - Statistics: Num rows: 110 Data size: 58740 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col4) (type: int), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n147 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) - outputColumnNames: c1, c2, c3, c4, c5, c6 - Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n147 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1, c2, c3, c4, c5, c6 - Column Types: int, string, int, string, int, string - Table: default.dest1_n147 - -PREHOOK: query: FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - FULL OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - LEFT OUTER JOIN - ( - FROM src src3 SELECT src3.key AS c5, src3.value AS c6 WHERE src3.key > 20 and src3.key < 25 - ) c - ON (a.c1 = c.c5) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 -) c -INSERT OVERWRITE TABLE dest1_n147 SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n147 -POSTHOOK: query: FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - FULL OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - LEFT OUTER JOIN - ( - FROM src src3 SELECT src3.key AS c5, src3.value AS c6 WHERE src3.key > 20 and src3.key < 25 - ) c - ON (a.c1 = c.c5) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 -) c -INSERT OVERWRITE TABLE dest1_n147 SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n147 -POSTHOOK: Lineage: dest1_n147.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n147.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n147.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n147.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n147.c5 EXPRESSION [(src)src3.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n147.c6 SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT sum(hash(dest1_n147.c1,dest1_n147.c2,dest1_n147.c3,dest1_n147.c4,dest1_n147.c5,dest1_n147.c6)) FROM dest1_n147 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n147 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(dest1_n147.c1,dest1_n147.c2,dest1_n147.c3,dest1_n147.c4,dest1_n147.c5,dest1_n147.c6)) FROM dest1_n147 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n147 -#### A masked pattern was here #### --2315698213 diff --git a/ql/src/test/results/clientpositive/llap/auto_join8.q.out b/ql/src/test/results/clientpositive/llap/auto_join8.q.out deleted file mode 100644 index 3f3dcd252c..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join8.q.out +++ /dev/null @@ -1,216 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n3(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n3 -POSTHOOK: query: CREATE TABLE dest1_n3(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n3 -PREHOOK: query: explain -FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - LEFT OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n3 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n3 -POSTHOOK: query: explain -FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - LEFT OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n3 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n3 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 10.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col2 is null (type: boolean) - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), null (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n3 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) - outputColumnNames: c1, c2, c3, c4 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n3 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1, c2, c3, c4 - Column Types: int, string, int, string - Table: default.dest1_n3 - -PREHOOK: query: FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - LEFT OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n3 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n3 -POSTHOOK: query: FROM ( - FROM - ( - FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 - ) a - LEFT OUTER JOIN - ( - FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 - ) b - ON (a.c1 = b.c3) - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 -) c -INSERT OVERWRITE TABLE dest1_n3 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n3 -POSTHOOK: Lineage: dest1_n3.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n3.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n3.c3 EXPRESSION [] -POSTHOOK: Lineage: dest1_n3.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT sum(hash(dest1_n3.c1,dest1_n3.c2,dest1_n3.c3,dest1_n3.c4)) FROM dest1_n3 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n3 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(dest1_n3.c1,dest1_n3.c2,dest1_n3.c3,dest1_n3.c4)) FROM dest1_n3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n3 -#### A masked pattern was here #### --7158439905 diff --git a/ql/src/test/results/clientpositive/llap/auto_join9.q.out b/ql/src/test/results/clientpositive/llap/auto_join9.q.out deleted file mode 100644 index 06c25ec17d..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join9.q.out +++ /dev/null @@ -1,174 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n142(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n142 -POSTHOOK: query: CREATE TABLE dest1_n142(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n142 -PREHOOK: query: explain -FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n142 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@dest1_n142 -POSTHOOK: query: explain -FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n142 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@dest1_n142 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 0 Map 1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n142 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 791 Data size: 75145 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n142 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest1_n142 - -PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n142 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@dest1_n142 -POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n142 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@dest1_n142 -POSTHOOK: Lineage: dest1_n142.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n142.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT sum(hash(dest1_n142.key,dest1_n142.value)) FROM dest1_n142 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n142 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(dest1_n142.key,dest1_n142.value)) FROM dest1_n142 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n142 -#### A masked pattern was here #### -101861029915 diff --git a/ql/src/test/results/clientpositive/llap/auto_join_reordering_values.q.out b/ql/src/test/results/clientpositive/llap/auto_join_reordering_values.q.out deleted file mode 100644 index abd6b8b6de..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join_reordering_values.q.out +++ /dev/null @@ -1,652 +0,0 @@ -PREHOOK: query: create table testsrc ( `key` int,`val` string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@testsrc -POSTHOOK: query: create table testsrc ( `key` int,`val` string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@testsrc -PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table testsrc -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@testsrc -POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table testsrc -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@testsrc -PREHOOK: query: drop table if exists orderpayment_small -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists orderpayment_small -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@orderpayment_small -POSTHOOK: query: create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@orderpayment_small -PREHOOK: query: insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc tablesample (1 rows) -PREHOOK: type: QUERY -PREHOOK: Input: default@testsrc -PREHOOK: Output: default@orderpayment_small -POSTHOOK: query: insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc tablesample (1 rows) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@testsrc -POSTHOOK: Output: default@orderpayment_small -POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE [] -POSTHOOK: Lineage: orderpayment_small.date SIMPLE [] -POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE [] -POSTHOOK: Lineage: orderpayment_small.time SIMPLE [] -POSTHOOK: Lineage: orderpayment_small.userid SIMPLE [] -PREHOOK: query: drop table if exists user_small -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists user_small -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table user_small( userid int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@user_small -POSTHOOK: query: create table user_small( userid int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@user_small -PREHOOK: query: insert overwrite table user_small select key from testsrc tablesample (100 rows) -PREHOOK: type: QUERY -PREHOOK: Input: default@testsrc -PREHOOK: Output: default@user_small -POSTHOOK: query: insert overwrite table user_small select key from testsrc tablesample (100 rows) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@testsrc -POSTHOOK: Output: default@user_small -POSTHOOK: Lineage: user_small.userid SIMPLE [(testsrc)testsrc.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: explain extended SELECT - `dim_pay_date`.`date` - , `deal`.`dealid` -FROM `orderpayment_small` `orderpayment` -JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date` -JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` -JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` -JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` -limit 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@orderpayment_small -PREHOOK: Input: default@user_small -#### A masked pattern was here #### -POSTHOOK: query: explain extended SELECT - `dim_pay_date`.`date` - , `deal`.`dealid` -FROM `orderpayment_small` `orderpayment` -JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date` -JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` -JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` -JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` -limit 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orderpayment_small -POSTHOOK: Input: default@user_small -#### A masked pattern was here #### -OPTIMIZED SQL: SELECT `t4`.`date`, `t6`.`dealid` -FROM (SELECT `userid` -FROM `default`.`user_small` -WHERE `userid` IS NOT NULL) AS `t0` -INNER JOIN ((SELECT `dealid`, `date`, `cityid`, `userid` -FROM `default`.`orderpayment_small` -WHERE `date` IS NOT NULL AND `dealid` IS NOT NULL AND `cityid` IS NOT NULL AND `userid` IS NOT NULL) AS `t2` -INNER JOIN (SELECT `date` -FROM `default`.`orderpayment_small` -WHERE `date` IS NOT NULL) AS `t4` ON `t2`.`date` = `t4`.`date` -INNER JOIN (SELECT `dealid` -FROM `default`.`orderpayment_small` -WHERE `dealid` IS NOT NULL) AS `t6` ON `t2`.`dealid` = `t6`.`dealid` -INNER JOIN (SELECT `cityid` -FROM `default`.`orderpayment_small` -WHERE `cityid` IS NOT NULL) AS `t8` ON `t2`.`cityid` = `t8`.`cityid`) ON `t0`.`userid` = `t2`.`userid` -LIMIT 5 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 8 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Map 9 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: orderpayment - filterExpr: (date is not null and dealid is not null and cityid is not null and userid is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (date is not null and dealid is not null and cityid is not null and userid is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: dealid (type: int), date (type: string), cityid (type: int), userid (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: orderpayment_small - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns dealid,date,time,cityid,userid - columns.comments - columns.types int:string:string:int:int -#### A masked pattern was here #### - name default.orderpayment_small - numFiles 1 - numRows 1 - rawDataSize 36 - serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 37 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns dealid,date,time,cityid,userid - columns.comments - columns.types int:string:string:int:int -#### A masked pattern was here #### - name default.orderpayment_small - numFiles 1 - numRows 1 - rawDataSize 36 - serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 37 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orderpayment_small - name: default.orderpayment_small - Truncated Path -> Alias: - /orderpayment_small [orderpayment] - Map 6 - Map Operator Tree: - TableScan - alias: dim_pay_date - filterExpr: date is not null (type: boolean) - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: date is not null (type: boolean) - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: date (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: orderpayment_small - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns dealid,date,time,cityid,userid - columns.comments - columns.types int:string:string:int:int -#### A masked pattern was here #### - name default.orderpayment_small - numFiles 1 - numRows 1 - rawDataSize 36 - serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 37 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns dealid,date,time,cityid,userid - columns.comments - columns.types int:string:string:int:int -#### A masked pattern was here #### - name default.orderpayment_small - numFiles 1 - numRows 1 - rawDataSize 36 - serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 37 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orderpayment_small - name: default.orderpayment_small - Truncated Path -> Alias: - /orderpayment_small [dim_pay_date] - Map 7 - Map Operator Tree: - TableScan - alias: deal - filterExpr: dealid is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: dealid is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: dealid (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: orderpayment_small - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns dealid,date,time,cityid,userid - columns.comments - columns.types int:string:string:int:int -#### A masked pattern was here #### - name default.orderpayment_small - numFiles 1 - numRows 1 - rawDataSize 36 - serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 37 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns dealid,date,time,cityid,userid - columns.comments - columns.types int:string:string:int:int -#### A masked pattern was here #### - name default.orderpayment_small - numFiles 1 - numRows 1 - rawDataSize 36 - serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 37 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orderpayment_small - name: default.orderpayment_small - Truncated Path -> Alias: - /orderpayment_small [deal] - Map 8 - Map Operator Tree: - TableScan - alias: order_city - filterExpr: cityid is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: cityid is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cityid (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: orderpayment_small - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns dealid,date,time,cityid,userid - columns.comments - columns.types int:string:string:int:int -#### A masked pattern was here #### - name default.orderpayment_small - numFiles 1 - numRows 1 - rawDataSize 36 - serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 37 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns dealid,date,time,cityid,userid - columns.comments - columns.types int:string:string:int:int -#### A masked pattern was here #### - name default.orderpayment_small - numFiles 1 - numRows 1 - rawDataSize 36 - serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 37 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orderpayment_small - name: default.orderpayment_small - Truncated Path -> Alias: - /orderpayment_small [order_city] - Map 9 - Map Operator Tree: - TableScan - alias: user - filterExpr: userid is not null (type: boolean) - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: userid is not null (type: boolean) - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: userid (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: user_small - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns userid - columns.comments - columns.types int -#### A masked pattern was here #### - name default.user_small - numFiles 1 - numRows 100 - rawDataSize 288 - serialization.ddl struct user_small { i32 userid} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 388 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns userid - columns.comments - columns.types int -#### A masked pattern was here #### - name default.user_small - numFiles 1 - numRows 100 - rawDataSize 288 - serialization.ddl struct user_small { i32 userid} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 388 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.user_small - name: default.user_small - Truncated Path -> Alias: - /user_small [user] - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3, _col4 - Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: string) - auto parallelism: true - Reducer 3 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5 - Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col3 (type: int), _col4 (type: string), _col5 (type: int) - auto parallelism: true - Reducer 4 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5 - Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col3 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col4 (type: string), _col5 (type: int) - auto parallelism: true - Reducer 5 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5 - Position of Big Table: 1 - Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col4 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: 5 - Processor Tree: - ListSink - -PREHOOK: query: SELECT - `dim_pay_date`.`date` - , `deal`.`dealid` -FROM `orderpayment_small` `orderpayment` -JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date` -JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` -JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` -JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` -limit 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@orderpayment_small -PREHOOK: Input: default@user_small -#### A masked pattern was here #### -POSTHOOK: query: SELECT - `dim_pay_date`.`date` - , `deal`.`dealid` -FROM `orderpayment_small` `orderpayment` -JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date` -JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` -JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` -JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` -limit 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orderpayment_small -POSTHOOK: Input: default@user_small -#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/auto_join_stats.q.out b/ql/src/test/results/clientpositive/llap/auto_join_stats.q.out deleted file mode 100644 index 00c371f461..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join_stats.q.out +++ /dev/null @@ -1,363 +0,0 @@ -PREHOOK: query: create table smalltable_n0(key string, value string) stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@smalltable_n0 -POSTHOOK: query: create table smalltable_n0(key string, value string) stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@smalltable_n0 -PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable_n0 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@smalltable_n0 -POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable_n0 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@smalltable_n0 -PREHOOK: query: analyze table smalltable_n0 compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@smalltable_n0 -PREHOOK: Output: default@smalltable_n0 -POSTHOOK: query: analyze table smalltable_n0 compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smalltable_n0 -POSTHOOK: Output: default@smalltable_n0 -PREHOOK: query: explain select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@smalltable_n0 -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smalltable_n0 -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: smalltable_n0 - filterExpr: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: double) - Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (_col1 + _col3) (type: double) - 1 _col1 (type: double) - outputColumnNames: _col0, _col2, _col4 - input vertices: - 1 Map 4 - Statistics: Num rows: 870 Data size: 165319 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 870 Data size: 165319 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 870 Data size: 165319 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@smalltable_n0 -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smalltable_n0 -POSTHOOK: Input: default@src -#### A masked pattern was here #### -4 4 8 -4 4 8 -PREHOOK: query: create table smalltable2_n0(key string, value string) stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@smalltable2_n0 -POSTHOOK: query: create table smalltable2_n0(key string, value string) stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@smalltable2_n0 -PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2_n0 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@smalltable2_n0 -POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2_n0 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@smalltable2_n0 -PREHOOK: query: analyze table smalltable_n0 compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@smalltable_n0 -PREHOOK: Output: default@smalltable_n0 -POSTHOOK: query: analyze table smalltable_n0 compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smalltable_n0 -POSTHOOK: Output: default@smalltable_n0 -PREHOOK: query: explain select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) JOIN smalltable2_n0 ON (src1.key + src2.key = smalltable2_n0.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@smalltable2_n0 -PREHOOK: Input: default@smalltable_n0 -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) JOIN smalltable2_n0 ON (src1.key + src2.key = smalltable2_n0.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smalltable2_n0 -POSTHOOK: Input: default@smalltable_n0 -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: smalltable2_n0 - filterExpr: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(key) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: smalltable_n0 - filterExpr: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: double) - Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (_col1 + _col3) (type: double) - 1 _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 870 Data size: 165319 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (_col1 + _col3) (type: double) - 1 _col1 (type: double) - outputColumnNames: _col0, _col2, _col5 - input vertices: - 1 Map 5 - Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) JOIN smalltable2_n0 ON (src1.key + src2.key = smalltable2_n0.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@smalltable2_n0 -PREHOOK: Input: default@smalltable_n0 -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select src1.key, src2.key, smalltable_n0.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable_n0 ON (src1.key + src2.key = smalltable_n0.key) JOIN smalltable2_n0 ON (src1.key + src2.key = smalltable2_n0.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smalltable2_n0 -POSTHOOK: Input: default@smalltable_n0 -POSTHOOK: Input: default@src -#### A masked pattern was here #### -4 4 8 -4 4 8 -4 4 8 -4 4 8 diff --git a/ql/src/test/results/clientpositive/llap/auto_join_stats2.q.out b/ql/src/test/results/clientpositive/llap/auto_join_stats2.q.out deleted file mode 100644 index b946781e12..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join_stats2.q.out +++ /dev/null @@ -1,340 +0,0 @@ -PREHOOK: query: create table smalltable(key string, value string) stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@smalltable -POSTHOOK: query: create table smalltable(key string, value string) stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@smalltable -PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@smalltable -POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@smalltable -Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@smalltable -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smalltable -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 1 Map 3 - residual filter predicates: {((_col5 + _col1) = _col3)} - Statistics: Num rows: 275 Data size: 77000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 77000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 77000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: smalltable - filterExpr: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@smalltable -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smalltable -POSTHOOK: Input: default@src -#### A masked pattern was here #### -4 4 8 -4 4 8 -PREHOOK: query: create table smalltable2(key string, value string) stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@smalltable2 -POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@smalltable2 -PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@smalltable2 -POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@smalltable2 -PREHOOK: query: analyze table smalltable compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@smalltable -PREHOOK: Output: default@smalltable -POSTHOOK: query: analyze table smalltable compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smalltable -POSTHOOK: Output: default@smalltable -PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@smalltable -PREHOOK: Input: default@smalltable2 -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smalltable -POSTHOOK: Input: default@smalltable2 -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (_col1 + _col3) (type: double) - 1 _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 870 Data size: 165319 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (_col1 + _col3) (type: double) - 1 _col1 (type: double) - outputColumnNames: _col0, _col2, _col5 - input vertices: - 1 Map 4 - Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: smalltable2 - filterExpr: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(key) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: smalltable - filterExpr: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: double) - Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@smalltable -PREHOOK: Input: default@smalltable2 -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smalltable -POSTHOOK: Input: default@smalltable2 -POSTHOOK: Input: default@src -#### A masked pattern was here #### -4 4 8 -4 4 8 -4 4 8 -4 4 8 diff --git a/ql/src/test/results/clientpositive/llap/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/llap/auto_join_without_localtask.q.out deleted file mode 100644 index da8f7ce53c..0000000000 --- a/ql/src/test/results/clientpositive/llap/auto_join_without_localtask.q.out +++ /dev/null @@ -1,615 +0,0 @@ -PREHOOK: query: explain -select a.* from src a join src b on a.key=b.key order by key, value limit 40 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select a.* from src a join src b on a.key=b.key order by key, value limit 40 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col0 (type: string), _col1 (type: string) - null sort order: zz - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - top n: 40 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 40 - Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 40 - Processor Tree: - ListSink - -PREHOOK: query: select a.* from src a join src b on a.key=b.key order by key, value limit 40 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select a.* from src a join src b on a.key=b.key order by key, value limit 40 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -RUN: Stage-1:MAPRED -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -10 val_10 -100 val_100 -100 val_100 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -PREHOOK: query: explain -select a.* from src a join src b on a.key=b.key join src c on a.value=c.value order by a.key, a.value limit 40 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select a.* from src a join src b on a.key=b.key join src c on a.value=c.value order by a.key, a.value limit 40 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: c - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1288 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col0 (type: string), _col1 (type: string) - null sort order: zz - Statistics: Num rows: 1288 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE - top n: 40 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Statistics: Num rows: 1288 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1288 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 40 - Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 40 - Processor Tree: - ListSink - -PREHOOK: query: select a.* from src a join src b on a.key=b.key join src c on a.value=c.value order by a.key, a.value limit 40 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select a.* from src a join src b on a.key=b.key join src c on a.value=c.value order by a.key, a.value limit 40 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -RUN: Stage-1:MAPRED -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -10 val_10 -100 val_100 -100 val_100 -100 val_100 -100 val_100 -100 val_100 -100 val_100 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -PREHOOK: query: explain -select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 order by a.key, a.value limit 40 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 order by a.key, a.value limit 40 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: ((UDFToDouble(key) > 100.0D) and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 100.0D) and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: b - filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: c - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col0 (type: string), _col1 (type: string) - null sort order: zz - Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE - top n: 40 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 40 - Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 40 Data size: 7120 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 40 - Processor Tree: - ListSink - -PREHOOK: query: select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 order by a.key, a.value limit 40 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 order by a.key, a.value limit 40 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -RUN: Stage-1:MAPRED -103 val_103 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -105 val_105 -111 val_111 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -119 val_119 -PREHOOK: query: select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 order by a.key, a.value limit 40 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 order by a.key, a.value limit 40 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -RUN: Stage-1:MAPRED -103 val_103 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -104 val_104 -105 val_105 -111 val_111 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -119 val_119 diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_4.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_4.q.out deleted file mode 100644 index 77f994fede..0000000000 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_4.q.out +++ /dev/null @@ -1,551 +0,0 @@ -PREHOOK: query: CREATE TABLE test_table1_n16 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test_table1_n16 -POSTHOOK: query: CREATE TABLE test_table1_n16 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test_table1_n16 -PREHOOK: query: CREATE TABLE test_table2_n15 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test_table2_n15 -POSTHOOK: query: CREATE TABLE test_table2_n15 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test_table2_n15 -PREHOOK: query: CREATE TABLE test_table3_n8 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key2) SORTED BY (key2) INTO 2 BUCKETS -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test_table3_n8 -POSTHOOK: query: CREATE TABLE test_table3_n8 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key2) SORTED BY (key2) INTO 2 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test_table3_n8 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1_n16 PARTITION (ds = '1') SELECT * where key < 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table1_n16@ds=1 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1_n16 PARTITION (ds = '1') SELECT * where key < 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table1_n16@ds=1 -POSTHOOK: Lineage: test_table1_n16 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table1_n16 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table2_n15 PARTITION (ds = '1') SELECT * where key < 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table2_n15@ds=1 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table2_n15 PARTITION (ds = '1') SELECT * where key < 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table2_n15@ds=1 -POSTHOOK: Lineage: test_table2_n15 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2_n15 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') -SELECT a.key, a.key, concat(a.value, b.value) -FROM test_table1_n16 a JOIN test_table2_n15 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1_n16 -PREHOOK: Input: default@test_table1_n16@ds=1 -PREHOOK: Input: default@test_table2_n15 -PREHOOK: Input: default@test_table2_n15@ds=1 -PREHOOK: Output: default@test_table3_n8@ds=1 -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') -SELECT a.key, a.key, concat(a.value, b.value) -FROM test_table1_n16 a JOIN test_table2_n15 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1_n16 -POSTHOOK: Input: default@test_table1_n16@ds=1 -POSTHOOK: Input: default@test_table2_n15 -POSTHOOK: Input: default@test_table2_n15@ds=1 -POSTHOOK: Output: default@test_table3_n8@ds=1 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 2562 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string) - Execution mode: llap - LLAP IO: no inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3_n8 - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) - outputColumnNames: key, key2, value, ds - Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string) - minReductionHashAggr: 0.9285714 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 1 - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3_n8 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, key2, value - Column Types: int, int, string - Table: default.test_table3_n8 - -PREHOOK: query: INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') -SELECT a.key, a.key, concat(a.value, b.value) -FROM test_table1_n16 a JOIN test_table2_n15 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1_n16 -PREHOOK: Input: default@test_table1_n16@ds=1 -PREHOOK: Input: default@test_table2_n15 -PREHOOK: Input: default@test_table2_n15@ds=1 -PREHOOK: Output: default@test_table3_n8@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') -SELECT a.key, a.key, concat(a.value, b.value) -FROM test_table1_n16 a JOIN test_table2_n15 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1_n16 -POSTHOOK: Input: default@test_table1_n16@ds=1 -POSTHOOK: Input: default@test_table2_n15 -POSTHOOK: Input: default@test_table2_n15@ds=1 -POSTHOOK: Output: default@test_table3_n8@ds=1 -POSTHOOK: Lineage: test_table3_n8 PARTITION(ds=1).key SIMPLE [(test_table1_n16)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3_n8 PARTITION(ds=1).key2 SIMPLE [(test_table1_n16)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3_n8 PARTITION(ds=1).value EXPRESSION [(test_table1_n16)a.FieldSchema(name:value, type:string, comment:null), (test_table2_n15)b.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: select * from test_table3_n8 tablesample (bucket 1 out of 2) s where ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table3_n8 -PREHOOK: Input: default@test_table3_n8@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select * from test_table3_n8 tablesample (bucket 1 out of 2) s where ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table3_n8 -POSTHOOK: Input: default@test_table3_n8@ds=1 -#### A masked pattern was here #### -2 2 val_2val_2 1 -PREHOOK: query: select * from test_table3_n8 tablesample (bucket 2 out of 2) s where ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table3_n8 -PREHOOK: Input: default@test_table3_n8@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select * from test_table3_n8 tablesample (bucket 2 out of 2) s where ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table3_n8 -POSTHOOK: Input: default@test_table3_n8@ds=1 -#### A masked pattern was here #### -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -4 4 val_4val_4 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -8 8 val_8val_8 1 -9 9 val_9val_9 1 -PREHOOK: query: DROP TABLE test_table3_n8 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@test_table3_n8 -PREHOOK: Output: default@test_table3_n8 -POSTHOOK: query: DROP TABLE test_table3_n8 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@test_table3_n8 -POSTHOOK: Output: default@test_table3_n8 -PREHOOK: query: CREATE TABLE test_table3_n8 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test_table3_n8 -POSTHOOK: query: CREATE TABLE test_table3_n8 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test_table3_n8 -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') -SELECT a.key, a.value -FROM test_table1_n16 a JOIN test_table2_n15 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1_n16 -PREHOOK: Input: default@test_table1_n16@ds=1 -PREHOOK: Input: default@test_table2_n15 -PREHOOK: Input: default@test_table2_n15@ds=1 -PREHOOK: Output: default@test_table3_n8@ds=1 -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') -SELECT a.key, a.value -FROM test_table1_n16 a JOIN test_table2_n15 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1_n16 -POSTHOOK: Input: default@test_table1_n16@ds=1 -POSTHOOK: Input: default@test_table2_n15 -POSTHOOK: Input: default@test_table2_n15@ds=1 -POSTHOOK: Output: default@test_table3_n8@ds=1 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 84 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 1302 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 14 Data size: 1302 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1302 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 1302 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3_n8 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) - outputColumnNames: key, value, ds - Statistics: Num rows: 14 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string) - minReductionHashAggr: 0.9285714 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 1 - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3_n8 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.test_table3_n8 - -PREHOOK: query: INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') -SELECT a.key, a.value -FROM test_table1_n16 a JOIN test_table2_n15 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1_n16 -PREHOOK: Input: default@test_table1_n16@ds=1 -PREHOOK: Input: default@test_table2_n15 -PREHOOK: Input: default@test_table2_n15@ds=1 -PREHOOK: Output: default@test_table3_n8@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table3_n8 PARTITION (ds = '1') -SELECT a.key, a.value -FROM test_table1_n16 a JOIN test_table2_n15 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1_n16 -POSTHOOK: Input: default@test_table1_n16@ds=1 -POSTHOOK: Input: default@test_table2_n15 -POSTHOOK: Input: default@test_table2_n15@ds=1 -POSTHOOK: Output: default@test_table3_n8@ds=1 -POSTHOOK: Lineage: test_table3_n8 PARTITION(ds=1).key SIMPLE [(test_table1_n16)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3_n8 PARTITION(ds=1).value SIMPLE [(test_table1_n16)a.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: select * from test_table3_n8 tablesample (bucket 1 out of 2) s where ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table3_n8 -PREHOOK: Input: default@test_table3_n8@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select * from test_table3_n8 tablesample (bucket 1 out of 2) s where ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table3_n8 -POSTHOOK: Input: default@test_table3_n8@ds=1 -#### A masked pattern was here #### -0 val_0 1 -0 val_0 1 -0 val_0 1 -0 val_0 1 -0 val_0 1 -0 val_0 1 -0 val_0 1 -0 val_0 1 -0 val_0 1 -2 val_2 1 -4 val_4 1 -8 val_8 1 -9 val_9 1 -PREHOOK: query: select * from test_table3_n8 tablesample (bucket 2 out of 2) s where ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table3_n8 -PREHOOK: Input: default@test_table3_n8@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select * from test_table3_n8 tablesample (bucket 2 out of 2) s where ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table3_n8 -POSTHOOK: Input: default@test_table3_n8@ds=1 -#### A masked pattern was here #### -5 val_5 1 -5 val_5 1 -5 val_5 1 -5 val_5 1 -5 val_5 1 -5 val_5 1 -5 val_5 1 -5 val_5 1 -5 val_5 1 -PREHOOK: query: DROP TABLE test_table3_n8 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@test_table3_n8 -PREHOOK: Output: default@test_table3_n8 -POSTHOOK: query: DROP TABLE test_table3_n8 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@test_table3_n8 -POSTHOOK: Output: default@test_table3_n8 diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_5.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_5.q.out deleted file mode 100644 index ca137dc2cd..0000000000 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_5.q.out +++ /dev/null @@ -1,540 +0,0 @@ -PREHOOK: query: CREATE TABLE test_table1_n8 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test_table1_n8 -POSTHOOK: query: CREATE TABLE test_table1_n8 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test_table1_n8 -PREHOOK: query: CREATE TABLE test_table2_n8 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test_table2_n8 -POSTHOOK: query: CREATE TABLE test_table2_n8 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test_table2_n8 -PREHOOK: query: CREATE TABLE test_table3_n5 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test_table3_n5 -POSTHOOK: query: CREATE TABLE test_table3_n5 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test_table3_n5 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1_n8 PARTITION (ds = '1') SELECT * where key < 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table1_n8@ds=1 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1_n8 PARTITION (ds = '1') SELECT * where key < 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table1_n8@ds=1 -POSTHOOK: Lineage: test_table1_n8 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table1_n8 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table2_n8 PARTITION (ds = '1') SELECT * where key < 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table2_n8@ds=1 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table2_n8 PARTITION (ds = '1') SELECT * where key < 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table2_n8@ds=1 -POSTHOOK: Lineage: test_table2_n8 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2_n8 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') -SELECT a.key, concat(a.value, b.value) -FROM test_table1_n8 a JOIN test_table2_n8 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1_n8 -PREHOOK: Input: default@test_table1_n8@ds=1 -PREHOOK: Input: default@test_table2_n8 -PREHOOK: Input: default@test_table2_n8@ds=1 -PREHOOK: Output: default@test_table3_n5@ds=1 -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') -SELECT a.key, concat(a.value, b.value) -FROM test_table1_n8 a JOIN test_table2_n8 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1_n8 -POSTHOOK: Input: default@test_table1_n8@ds=1 -POSTHOOK: Input: default@test_table2_n8 -POSTHOOK: Input: default@test_table2_n8@ds=1 -POSTHOOK: Output: default@test_table3_n5@ds=1 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 2562 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: - - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3_n5 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) - outputColumnNames: key, value, ds - Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string) - minReductionHashAggr: 0.9285714 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 1 - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3_n5 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.test_table3_n5 - -PREHOOK: query: INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') -SELECT a.key, concat(a.value, b.value) -FROM test_table1_n8 a JOIN test_table2_n8 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1_n8 -PREHOOK: Input: default@test_table1_n8@ds=1 -PREHOOK: Input: default@test_table2_n8 -PREHOOK: Input: default@test_table2_n8@ds=1 -PREHOOK: Output: default@test_table3_n5@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') -SELECT a.key, concat(a.value, b.value) -FROM test_table1_n8 a JOIN test_table2_n8 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1_n8 -POSTHOOK: Input: default@test_table1_n8@ds=1 -POSTHOOK: Input: default@test_table2_n8 -POSTHOOK: Input: default@test_table2_n8@ds=1 -POSTHOOK: Output: default@test_table3_n5@ds=1 -POSTHOOK: Lineage: test_table3_n5 PARTITION(ds=1).key SIMPLE [(test_table1_n8)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3_n5 PARTITION(ds=1).value EXPRESSION [(test_table1_n8)a.FieldSchema(name:value, type:string, comment:null), (test_table2_n8)b.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: select * from test_table3_n5 tablesample (bucket 1 out of 2) s where ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table3_n5 -PREHOOK: Input: default@test_table3_n5@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select * from test_table3_n5 tablesample (bucket 1 out of 2) s where ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table3_n5 -POSTHOOK: Input: default@test_table3_n5@ds=1 -#### A masked pattern was here #### -2 val_2val_2 1 -PREHOOK: query: select * from test_table3_n5 tablesample (bucket 2 out of 2) s where ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table3_n5 -PREHOOK: Input: default@test_table3_n5@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select * from test_table3_n5 tablesample (bucket 2 out of 2) s where ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table3_n5 -POSTHOOK: Input: default@test_table3_n5@ds=1 -#### A masked pattern was here #### -9 val_9val_9 1 -8 val_8val_8 1 -5 val_5val_5 1 -5 val_5val_5 1 -5 val_5val_5 1 -5 val_5val_5 1 -5 val_5val_5 1 -5 val_5val_5 1 -5 val_5val_5 1 -5 val_5val_5 1 -5 val_5val_5 1 -4 val_4val_4 1 -0 val_0val_0 1 -0 val_0val_0 1 -0 val_0val_0 1 -0 val_0val_0 1 -0 val_0val_0 1 -0 val_0val_0 1 -0 val_0val_0 1 -0 val_0val_0 1 -0 val_0val_0 1 -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') -SELECT a.key, concat(a.value, b.value) -FROM -(select key, value from test_table1_n8 where ds = '1') a -JOIN -(select key, value from test_table2_n8 where ds = '1') b -ON a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1_n8 -PREHOOK: Input: default@test_table1_n8@ds=1 -PREHOOK: Input: default@test_table2_n8 -PREHOOK: Input: default@test_table2_n8@ds=1 -PREHOOK: Output: default@test_table3_n5@ds=1 -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') -SELECT a.key, concat(a.value, b.value) -FROM -(select key, value from test_table1_n8 where ds = '1') a -JOIN -(select key, value from test_table2_n8 where ds = '1') b -ON a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1_n8 -POSTHOOK: Input: default@test_table1_n8@ds=1 -POSTHOOK: Input: default@test_table2_n8 -POSTHOOK: Input: default@test_table2_n8@ds=1 -POSTHOOK: Output: default@test_table3_n5@ds=1 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: test_table1_n8 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: test_table2_n8 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 2562 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: - - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3_n5 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) - outputColumnNames: key, value, ds - Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string) - minReductionHashAggr: 0.9285714 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 1 - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3_n5 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.test_table3_n5 - -PREHOOK: query: INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') -SELECT a.key, concat(a.value, b.value) -FROM -(select key, value from test_table1_n8 where ds = '1') a -JOIN -(select key, value from test_table2_n8 where ds = '1') b -ON a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1_n8 -PREHOOK: Input: default@test_table1_n8@ds=1 -PREHOOK: Input: default@test_table2_n8 -PREHOOK: Input: default@test_table2_n8@ds=1 -PREHOOK: Output: default@test_table3_n5@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table3_n5 PARTITION (ds = '1') -SELECT a.key, concat(a.value, b.value) -FROM -(select key, value from test_table1_n8 where ds = '1') a -JOIN -(select key, value from test_table2_n8 where ds = '1') b -ON a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1_n8 -POSTHOOK: Input: default@test_table1_n8@ds=1 -POSTHOOK: Input: default@test_table2_n8 -POSTHOOK: Input: default@test_table2_n8@ds=1 -POSTHOOK: Output: default@test_table3_n5@ds=1 -POSTHOOK: Lineage: test_table3_n5 PARTITION(ds=1).key SIMPLE [(test_table1_n8)test_table1_n8.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3_n5 PARTITION(ds=1).value EXPRESSION [(test_table1_n8)test_table1_n8.FieldSchema(name:value, type:string, comment:null), (test_table2_n8)test_table2_n8.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: select * from test_table3_n5 tablesample (bucket 1 out of 2) s where ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table3_n5 -PREHOOK: Input: default@test_table3_n5@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select * from test_table3_n5 tablesample (bucket 1 out of 2) s where ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table3_n5 -POSTHOOK: Input: default@test_table3_n5@ds=1 -#### A masked pattern was here #### -2 val_2val_2 1 -PREHOOK: query: select * from test_table3_n5 tablesample (bucket 2 out of 2) s where ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table3_n5 -PREHOOK: Input: default@test_table3_n5@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select * from test_table3_n5 tablesample (bucket 2 out of 2) s where ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table3_n5 -POSTHOOK: Input: default@test_table3_n5@ds=1 -#### A masked pattern was here #### -9 val_9val_9 1 -8 val_8val_8 1 -5 val_5val_5 1 -5 val_5val_5 1 -5 val_5val_5 1 -5 val_5val_5 1 -5 val_5val_5 1 -5 val_5val_5 1 -5 val_5val_5 1 -5 val_5val_5 1 -5 val_5val_5 1 -4 val_4val_4 1 -0 val_0val_0 1 -0 val_0val_0 1 -0 val_0val_0 1 -0 val_0val_0 1 -0 val_0val_0 1 -0 val_0val_0 1 -0 val_0val_0 1 -0 val_0val_0 1 -0 val_0val_0 1 diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_8.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_8.q.out deleted file mode 100644 index 11d17af1e0..0000000000 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_8.q.out +++ /dev/null @@ -1,530 +0,0 @@ -PREHOOK: query: CREATE TABLE test_table1_n2 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test_table1_n2 -POSTHOOK: query: CREATE TABLE test_table1_n2 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test_table1_n2 -PREHOOK: query: CREATE TABLE test_table2_n2 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test_table2_n2 -POSTHOOK: query: CREATE TABLE test_table2_n2 (key INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test_table2_n2 -PREHOOK: query: CREATE TABLE test_table3_n2 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test_table3_n2 -POSTHOOK: query: CREATE TABLE test_table3_n2 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING) -CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test_table3_n2 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1_n2 PARTITION (ds = '1') SELECT * where key < 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table1_n2@ds=1 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1_n2 PARTITION (ds = '1') SELECT * where key < 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table1_n2@ds=1 -POSTHOOK: Lineage: test_table1_n2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table1_n2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table2_n2 PARTITION (ds = '1') SELECT * where key < 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table2_n2@ds=1 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table2_n2 PARTITION (ds = '1') SELECT * where key < 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table2_n2@ds=1 -POSTHOOK: Lineage: test_table2_n2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2_n2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') -SELECT a.key, b.key, concat(a.value, b.value) -FROM test_table1_n2 a JOIN test_table2_n2 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1_n2 -PREHOOK: Input: default@test_table1_n2@ds=1 -PREHOOK: Input: default@test_table2_n2 -PREHOOK: Input: default@test_table2_n2@ds=1 -PREHOOK: Output: default@test_table3_n2@ds=1 -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') -SELECT a.key, b.key, concat(a.value, b.value) -FROM test_table1_n2 a JOIN test_table2_n2 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1_n2 -POSTHOOK: Input: default@test_table1_n2@ds=1 -POSTHOOK: Input: default@test_table2_n2 -POSTHOOK: Input: default@test_table2_n2@ds=1 -POSTHOOK: Output: default@test_table3_n2@ds=1 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: string) - Execution mode: llap - LLAP IO: no inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3_n2 - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) - outputColumnNames: key, key2, value, ds - Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string) - minReductionHashAggr: 0.9285714 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 1 - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3_n2 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, key2, value - Column Types: int, int, string - Table: default.test_table3_n2 - -PREHOOK: query: INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') -SELECT a.key, b.key, concat(a.value, b.value) -FROM test_table1_n2 a JOIN test_table2_n2 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1_n2 -PREHOOK: Input: default@test_table1_n2@ds=1 -PREHOOK: Input: default@test_table2_n2 -PREHOOK: Input: default@test_table2_n2@ds=1 -PREHOOK: Output: default@test_table3_n2@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') -SELECT a.key, b.key, concat(a.value, b.value) -FROM test_table1_n2 a JOIN test_table2_n2 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1_n2 -POSTHOOK: Input: default@test_table1_n2@ds=1 -POSTHOOK: Input: default@test_table2_n2 -POSTHOOK: Input: default@test_table2_n2@ds=1 -POSTHOOK: Output: default@test_table3_n2@ds=1 -POSTHOOK: Lineage: test_table3_n2 PARTITION(ds=1).key SIMPLE [(test_table1_n2)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3_n2 PARTITION(ds=1).key2 SIMPLE [(test_table2_n2)b.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3_n2 PARTITION(ds=1).value EXPRESSION [(test_table1_n2)a.FieldSchema(name:value, type:string, comment:null), (test_table2_n2)b.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: select * from test_table3_n2 tablesample (bucket 1 out of 2) s where ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table3_n2 -PREHOOK: Input: default@test_table3_n2@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select * from test_table3_n2 tablesample (bucket 1 out of 2) s where ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table3_n2 -POSTHOOK: Input: default@test_table3_n2@ds=1 -#### A masked pattern was here #### -2 2 val_2val_2 1 -PREHOOK: query: select * from test_table3_n2 tablesample (bucket 2 out of 2) s where ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table3_n2 -PREHOOK: Input: default@test_table3_n2@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select * from test_table3_n2 tablesample (bucket 2 out of 2) s where ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table3_n2 -POSTHOOK: Input: default@test_table3_n2@ds=1 -#### A masked pattern was here #### -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -4 4 val_4val_4 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -8 8 val_8val_8 1 -9 9 val_9val_9 1 -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') -SELECT b.key, a.key, concat(a.value, b.value) -FROM test_table1_n2 a JOIN test_table2_n2 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1_n2 -PREHOOK: Input: default@test_table1_n2@ds=1 -PREHOOK: Input: default@test_table2_n2 -PREHOOK: Input: default@test_table2_n2@ds=1 -PREHOOK: Output: default@test_table3_n2@ds=1 -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') -SELECT b.key, a.key, concat(a.value, b.value) -FROM test_table1_n2 a JOIN test_table2_n2 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1_n2 -POSTHOOK: Input: default@test_table1_n2@ds=1 -POSTHOOK: Input: default@test_table2_n2 -POSTHOOK: Input: default@test_table2_n2@ds=1 -POSTHOOK: Output: default@test_table3_n2@ds=1 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 2 <- Map 1 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: int), _col0 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: string) - Execution mode: llap - LLAP IO: no inputs - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3_n2 - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) - outputColumnNames: key, key2, value, ds - Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string) - minReductionHashAggr: 0.9285714 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 1 - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3_n2 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, key2, value - Column Types: int, int, string - Table: default.test_table3_n2 - -PREHOOK: query: INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') -SELECT b.key, a.key, concat(a.value, b.value) -FROM test_table1_n2 a JOIN test_table2_n2 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1_n2 -PREHOOK: Input: default@test_table1_n2@ds=1 -PREHOOK: Input: default@test_table2_n2 -PREHOOK: Input: default@test_table2_n2@ds=1 -PREHOOK: Output: default@test_table3_n2@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table3_n2 PARTITION (ds = '1') -SELECT b.key, a.key, concat(a.value, b.value) -FROM test_table1_n2 a JOIN test_table2_n2 b -ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1_n2 -POSTHOOK: Input: default@test_table1_n2@ds=1 -POSTHOOK: Input: default@test_table2_n2 -POSTHOOK: Input: default@test_table2_n2@ds=1 -POSTHOOK: Output: default@test_table3_n2@ds=1 -POSTHOOK: Lineage: test_table3_n2 PARTITION(ds=1).key SIMPLE [(test_table2_n2)b.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3_n2 PARTITION(ds=1).key2 SIMPLE [(test_table1_n2)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3_n2 PARTITION(ds=1).value EXPRESSION [(test_table1_n2)a.FieldSchema(name:value, type:string, comment:null), (test_table2_n2)b.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: select * from test_table3_n2 tablesample (bucket 1 out of 2) s where ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table3_n2 -PREHOOK: Input: default@test_table3_n2@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select * from test_table3_n2 tablesample (bucket 1 out of 2) s where ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table3_n2 -POSTHOOK: Input: default@test_table3_n2@ds=1 -#### A masked pattern was here #### -2 2 val_2val_2 1 -PREHOOK: query: select * from test_table3_n2 tablesample (bucket 2 out of 2) s where ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table3_n2 -PREHOOK: Input: default@test_table3_n2@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select * from test_table3_n2 tablesample (bucket 2 out of 2) s where ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table3_n2 -POSTHOOK: Input: default@test_table3_n2@ds=1 -#### A masked pattern was here #### -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -0 0 val_0val_0 1 -4 4 val_4val_4 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -5 5 val_5val_5 1 -8 8 val_8val_8 1 -9 9 val_9val_9 1 diff --git a/ql/src/test/results/clientpositive/llap/case_sensitivity.q.out b/ql/src/test/results/clientpositive/llap/case_sensitivity.q.out deleted file mode 100644 index 27f1c15a54..0000000000 --- a/ql/src/test/results/clientpositive/llap/case_sensitivity.q.out +++ /dev/null @@ -1,138 +0,0 @@ -PREHOOK: query: CREATE TABLE DEST1_n129(Key INT, VALUE STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@DEST1_n129 -POSTHOOK: query: CREATE TABLE DEST1_n129(Key INT, VALUE STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@DEST1_n129 -PREHOOK: query: EXPLAIN -FROM SRC_THRIFT -INSERT OVERWRITE TABLE dest1_n129 SELECT src_Thrift.LINT[1], src_thrift.lintstring[0].MYSTRING where src_thrift.liNT[0] > 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_thrift -PREHOOK: Output: default@dest1_n129 -POSTHOOK: query: EXPLAIN -FROM SRC_THRIFT -INSERT OVERWRITE TABLE dest1_n129 SELECT src_Thrift.LINT[1], src_thrift.lintstring[0].MYSTRING where src_thrift.liNT[0] > 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_thrift -POSTHOOK: Output: default@dest1_n129 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src_thrift - filterExpr: (lint[0] > 0) (type: boolean) - Statistics: Num rows: 11 Data size: 29480 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (lint[0] > 0) (type: boolean) - Statistics: Num rows: 3 Data size: 8039 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: lint[1] (type: int), lintstring[0].mystring (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 8039 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 8039 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n129 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 3 Data size: 8039 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3544 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 3544 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3560 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n129 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest1_n129 - -PREHOOK: query: FROM SRC_THRIFT -INSERT OVERWRITE TABLE dest1_n129 SELECT src_Thrift.LINT[1], src_thrift.lintstring[0].MYSTRING where src_thrift.liNT[0] > 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_thrift -PREHOOK: Output: default@dest1_n129 -POSTHOOK: query: FROM SRC_THRIFT -INSERT OVERWRITE TABLE dest1_n129 SELECT src_Thrift.LINT[1], src_thrift.lintstring[0].MYSTRING where src_thrift.liNT[0] > 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_thrift -POSTHOOK: Output: default@dest1_n129 -POSTHOOK: Lineage: dest1_n129.key EXPRESSION [(src_thrift)src_thrift.FieldSchema(name:lint, type:array, comment:from deserializer), ] -POSTHOOK: Lineage: dest1_n129.value EXPRESSION [(src_thrift)src_thrift.FieldSchema(name:lintstring, type:array>, comment:from deserializer), ] -PREHOOK: query: SELECT DEST1_n129.* FROM Dest1_n129 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n129 -#### A masked pattern was here #### -POSTHOOK: query: SELECT DEST1_n129.* FROM Dest1_n129 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n129 -#### A masked pattern was here #### -2 1 -4 8 -6 27 -8 64 -10 125 -12 216 -14 343 -16 512 -18 729 diff --git a/ql/src/test/results/clientpositive/llap/cast1.q.out b/ql/src/test/results/clientpositive/llap/cast1.q.out deleted file mode 100644 index 1e2217c75b..0000000000 --- a/ql/src/test/results/clientpositive/llap/cast1.q.out +++ /dev/null @@ -1,131 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n151(c1 INT, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 INT, c6 STRING, c7 INT) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n151 -POSTHOOK: query: CREATE TABLE dest1_n151(c1 INT, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 INT, c6 STRING, c7 INT) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n151 -PREHOOK: query: EXPLAIN -FROM src INSERT OVERWRITE TABLE dest1_n151 SELECT 3 + 2, 3.0 + 2, 3 + 2.0, 3.0 + 2.0, 3 + CAST(2.0 AS INT) + CAST(CAST(0 AS SMALLINT) AS INT), CAST(1 AS BOOLEAN), CAST(TRUE AS INT) WHERE src.key = 86 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n151 -POSTHOOK: query: EXPLAIN -FROM src INSERT OVERWRITE TABLE dest1_n151 SELECT 3 + 2, 3.0 + 2, 3 + 2.0, 3.0 + 2.0, 3 + CAST(2.0 AS INT) + CAST(CAST(0 AS SMALLINT) AS INT), CAST(1 AS BOOLEAN), CAST(TRUE AS INT) WHERE src.key = 86 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n151 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) = 86.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) = 86.0D) (type: boolean) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 5 (type: int), 5.0D (type: double), 5.0D (type: double), 5.0D (type: double), 5 (type: int), 'TRUE' (type: string), 1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 250 Data size: 31000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 31000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n151 - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: string), _col6 (type: int) - outputColumnNames: c1, c2, c3, c4, c5, c6, c7 - Statistics: Num rows: 250 Data size: 31000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 2984 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 2984 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n151 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1, c2, c3, c4, c5, c6, c7 - Column Types: int, double, double, double, int, string, int - Table: default.dest1_n151 - -PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n151 SELECT 3 + 2, 3.0 + 2, 3 + 2.0, 3.0 + 2.0, 3 + CAST(2.0 AS INT) + CAST(CAST(0 AS SMALLINT) AS INT), CAST(1 AS BOOLEAN), CAST(TRUE AS INT) WHERE src.key = 86 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n151 -POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n151 SELECT 3 + 2, 3.0 + 2, 3 + 2.0, 3.0 + 2.0, 3 + CAST(2.0 AS INT) + CAST(CAST(0 AS SMALLINT) AS INT), CAST(1 AS BOOLEAN), CAST(TRUE AS INT) WHERE src.key = 86 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n151 -POSTHOOK: Lineage: dest1_n151.c1 SIMPLE [] -POSTHOOK: Lineage: dest1_n151.c2 EXPRESSION [] -POSTHOOK: Lineage: dest1_n151.c3 EXPRESSION [] -POSTHOOK: Lineage: dest1_n151.c4 EXPRESSION [] -POSTHOOK: Lineage: dest1_n151.c5 SIMPLE [] -POSTHOOK: Lineage: dest1_n151.c6 EXPRESSION [] -POSTHOOK: Lineage: dest1_n151.c7 SIMPLE [] -PREHOOK: query: select dest1_n151.* FROM dest1_n151 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n151 -#### A masked pattern was here #### -POSTHOOK: query: select dest1_n151.* FROM dest1_n151 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n151 -#### A masked pattern was here #### -5 5.0 5.0 5.0 5 TRUE 1 diff --git a/ql/src/test/results/clientpositive/llap/cast_on_constant.q.out b/ql/src/test/results/clientpositive/llap/cast_on_constant.q.out deleted file mode 100644 index c3f1386399..0000000000 --- a/ql/src/test/results/clientpositive/llap/cast_on_constant.q.out +++ /dev/null @@ -1,146 +0,0 @@ -PREHOOK: query: create table t1_n138(ts_field timestamp, date_field date) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t1_n138 -POSTHOOK: query: create table t1_n138(ts_field timestamp, date_field date) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t1_n138 -PREHOOK: query: explain select * from t1_n138 where ts_field = "2016-01-23 00:00:00" -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n138 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from t1_n138 where ts_field = "2016-01-23 00:00:00" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n138 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: t1_n138 - filterExpr: (ts_field = TIMESTAMP'2016-01-23 00:00:00') (type: boolean) - Filter Operator - predicate: (ts_field = TIMESTAMP'2016-01-23 00:00:00') (type: boolean) - Select Operator - expressions: TIMESTAMP'2016-01-23 00:00:00' (type: timestamp), date_field (type: date) - outputColumnNames: _col0, _col1 - ListSink - -PREHOOK: query: explain select * from t1_n138 where date_field = "2016-01-23" -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n138 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from t1_n138 where date_field = "2016-01-23" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n138 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: t1_n138 - filterExpr: (date_field = DATE'2016-01-23') (type: boolean) - Filter Operator - predicate: (date_field = DATE'2016-01-23') (type: boolean) - Select Operator - expressions: ts_field (type: timestamp), DATE'2016-01-23' (type: date) - outputColumnNames: _col0, _col1 - ListSink - -PREHOOK: query: explain select * from t1_n138 where ts_field = timestamp '2016-01-23 00:00:00' -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n138 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from t1_n138 where ts_field = timestamp '2016-01-23 00:00:00' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n138 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: t1_n138 - filterExpr: (ts_field = TIMESTAMP'2016-01-23 00:00:00') (type: boolean) - Filter Operator - predicate: (ts_field = TIMESTAMP'2016-01-23 00:00:00') (type: boolean) - Select Operator - expressions: TIMESTAMP'2016-01-23 00:00:00' (type: timestamp), date_field (type: date) - outputColumnNames: _col0, _col1 - ListSink - -PREHOOK: query: explain select * from t1_n138 where date_field = date '2016-01-23' -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n138 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from t1_n138 where date_field = date '2016-01-23' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n138 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: t1_n138 - filterExpr: (date_field = DATE'2016-01-23') (type: boolean) - Filter Operator - predicate: (date_field = DATE'2016-01-23') (type: boolean) - Select Operator - expressions: ts_field (type: timestamp), DATE'2016-01-23' (type: date) - outputColumnNames: _col0, _col1 - ListSink - -PREHOOK: query: explain select * from t1_n138 where date_field = ts_field -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n138 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from t1_n138 where date_field = ts_field -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n138 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: t1_n138 - filterExpr: (CAST( date_field AS TIMESTAMP) = ts_field) (type: boolean) - Filter Operator - predicate: (CAST( date_field AS TIMESTAMP) = ts_field) (type: boolean) - Select Operator - expressions: ts_field (type: timestamp), date_field (type: date) - outputColumnNames: _col0, _col1 - ListSink - -PREHOOK: query: drop table t1_n138 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@t1_n138 -PREHOOK: Output: default@t1_n138 -POSTHOOK: query: drop table t1_n138 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@t1_n138 -POSTHOOK: Output: default@t1_n138 diff --git a/ql/src/test/results/clientpositive/llap/cbo_SortUnionTransposeRule.q.out b/ql/src/test/results/clientpositive/llap/cbo_SortUnionTransposeRule.q.out deleted file mode 100644 index b3fa3b78db..0000000000 --- a/ql/src/test/results/clientpositive/llap/cbo_SortUnionTransposeRule.q.out +++ /dev/null @@ -1,1256 +0,0 @@ -PREHOOK: query: create table s_n3 as select * from src limit 10 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@s_n3 -POSTHOOK: query: create table s_n3 as select * from src limit 10 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@s_n3 -POSTHOOK: Lineage: s_n3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: s_n3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@s_n3 -#### A masked pattern was here #### -POSTHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@s_n3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 4 <- Union 2 (CONTAINS) - Reducer 3 <- Union 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -limit 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@s_n3 -#### A masked pattern was here #### -POSTHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -limit 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@s_n3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 0 - Processor Tree: - ListSink - -PREHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -limit 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@s_n3 -#### A masked pattern was here #### -POSTHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -limit 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@s_n3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs - Union 2 - Vertex: Union 2 - - Stage: Stage-0 - Fetch Operator - limit: 5 - Processor Tree: - ListSink - -PREHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -order by key -limit 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@s_n3 -#### A masked pattern was here #### -POSTHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -order by key -limit 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@s_n3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 4 <- Union 2 (CONTAINS) - Reducer 3 <- Union 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 - - Stage: Stage-0 - Fetch Operator - limit: 5 - Processor Tree: - ListSink - -PREHOOK: query: explain -select * from( -select src1.key, src2.value -from src src1 left outer join src src2 -on src1.key = src2.key -limit 10)subq1 -union all -select * from( -select src1.key, src2.value -from src src1 left outer join src src2 -on src1.key = src2.key -limit 10)subq2 -limit 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select * from( -select src1.key, src2.value -from src src1 left outer join src src2 -on src1.key = src2.key -limit 10)subq1 -union all -select * from( -select src1.key, src2.value -from src src1 left outer join src src2 -on src1.key = src2.key -limit 10)subq2 -limit 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE), Union 4 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string) - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 4 - Vertex: Union 4 - - Stage: Stage-0 - Fetch Operator - limit: 5 - Processor Tree: - ListSink - -PREHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@s_n3 -#### A masked pattern was here #### -POSTHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@s_n3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 4 <- Union 2 (CONTAINS) - Reducer 3 <- Union 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -limit 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@s_n3 -#### A masked pattern was here #### -POSTHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -limit 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@s_n3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 0 - Processor Tree: - ListSink - -PREHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -limit 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@s_n3 -#### A masked pattern was here #### -POSTHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -limit 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@s_n3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: 5 - Processor Tree: - ListSink - -PREHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -order by key -limit 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@s_n3 -#### A masked pattern was here #### -POSTHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -order by key -limit 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@s_n3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) - null sort order: z - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) - null sort order: z - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: 5 - Processor Tree: - ListSink - -PREHOOK: query: explain -select * from( -select src1.key, src2.value -from src src1 left outer join src src2 -on src1.key = src2.key -limit 10)subq1 -union all -select * from( -select src1.key, src2.value -from src src1 left outer join src src2 -on src1.key = src2.key -limit 10)subq2 -limit 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select * from( -select src1.key, src2.value -from src src1 left outer join src src2 -on src1.key = src2.key -limit 10)subq1 -union all -select * from( -select src1.key, src2.value -from src src1 left outer join src src2 -on src1.key = src2.key -limit 10)subq2 -limit 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 5 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 12 Data size: 1772 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1772 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 12 Data size: 1772 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1772 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string) - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 799 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Union 4 - Vertex: Union 4 - - Stage: Stage-0 - Fetch Operator - limit: 5 - Processor Tree: - ListSink - -PREHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -limit 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@s_n3 -#### A masked pattern was here #### -POSTHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -limit 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@s_n3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs - Union 2 - Vertex: Union 2 - - Stage: Stage-0 - Fetch Operator - limit: 5 - Processor Tree: - ListSink - -PREHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -limit 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@s_n3 -#### A masked pattern was here #### -POSTHOOK: query: explain -select key from s_n3 a -union all -select key from s_n3 b -limit 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@s_n3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs - Union 2 - Vertex: Union 2 - - Stage: Stage-0 - Fetch Operator - limit: 5 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/cbo_input26.q.out b/ql/src/test/results/clientpositive/llap/cbo_input26.q.out deleted file mode 100644 index 25651469d0..0000000000 --- a/ql/src/test/results/clientpositive/llap/cbo_input26.q.out +++ /dev/null @@ -1,642 +0,0 @@ -PREHOOK: query: explain -select * from ( - select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa - union all - select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: explain -select * from ( - select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa - union all - select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: ((ds = '2008-04-08') and (hr = '11')) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) - Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) - Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: unknown - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 2148 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 2148 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), '14' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 254 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 2148 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 2148 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from ( - select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa - union all - select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from ( - select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa - union all - select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -0 val_0 2008-04-08 11 -0 val_0 2008-04-08 11 -0 val_0 2008-04-08 11 -10 val_10 2008-04-08 11 -100 val_100 2008-04-08 11 -PREHOOK: query: explain -select * from ( - select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa - union all - select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: explain -select * from ( - select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa - union all - select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: ((ds = '2008-04-08') and (hr = '11')) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) - Statistics: Num rows: 1 Data size: 452 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) - Statistics: Num rows: 1 Data size: 452 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: unknown - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), '11' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: '2008-04-08' (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), '14' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: '2008-04-08' (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from ( - select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa - union all - select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from ( - select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa - union all - select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -2008-04-08 0 11 -2008-04-08 0 11 -2008-04-08 0 11 -2008-04-08 10 11 -2008-04-08 100 11 -PREHOOK: query: explain -select * from ( - select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa - union all - select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: explain -select * from ( - select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa - union all - select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: ((ds = '2008-04-08') and (hr = '11')) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) - Statistics: Num rows: 1 Data size: 452 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) - Statistics: Num rows: 1 Data size: 452 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: unknown - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), '11' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: '2008-04-08' (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), '14' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: '2008-04-08' (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from ( - select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa - union all - select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from ( - select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa - union all - select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -2008-04-08 0 11 -2008-04-08 0 11 -2008-04-08 0 11 -2008-04-08 10 11 -2008-04-08 100 11 -PREHOOK: query: explain -select * from ( - select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa - union all - select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: explain -select * from ( - select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa - union all - select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: ((ds = '2008-04-08') and (hr = '11')) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) - Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) - Statistics: Num rows: 1 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: unknown - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), '2008-04-08' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), '2008-04-08' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from ( - select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa - union all - select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from ( - select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa - union all - select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb -)subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -238 2008-04-08 val_238 -86 2008-04-08 val_86 -311 2008-04-08 val_311 -27 2008-04-08 val_27 -165 2008-04-08 val_165 diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_annotate_stats_groupby.q.out deleted file mode 100644 index d34933f31f..0000000000 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_annotate_stats_groupby.q.out +++ /dev/null @@ -1,1564 +0,0 @@ -PREHOOK: query: create table if not exists loc_staging_n1 ( - state string, - locid int, - zip bigint, - year int -) row format delimited fields terminated by '|' stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@loc_staging_n1 -POSTHOOK: query: create table if not exists loc_staging_n1 ( - state string, - locid int, - zip bigint, - year int -) row format delimited fields terminated by '|' stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@loc_staging_n1 -PREHOOK: query: create table loc_orc_n1 like loc_staging_n1 -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@loc_orc_n1 -POSTHOOK: query: create table loc_orc_n1 like loc_staging_n1 -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@loc_orc_n1 -PREHOOK: query: alter table loc_orc_n1 set fileformat orc -PREHOOK: type: ALTERTABLE_FILEFORMAT -PREHOOK: Input: default@loc_orc_n1 -PREHOOK: Output: default@loc_orc_n1 -POSTHOOK: query: alter table loc_orc_n1 set fileformat orc -POSTHOOK: type: ALTERTABLE_FILEFORMAT -POSTHOOK: Input: default@loc_orc_n1 -POSTHOOK: Output: default@loc_orc_n1 -PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging_n1 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@loc_staging_n1 -POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging_n1 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@loc_staging_n1 -PREHOOK: query: insert overwrite table loc_orc_n1 select * from loc_staging_n1 -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_staging_n1 -PREHOOK: Output: default@loc_orc_n1 -POSTHOOK: query: insert overwrite table loc_orc_n1 select * from loc_staging_n1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_staging_n1 -POSTHOOK: Output: default@loc_orc_n1 -POSTHOOK: Lineage: loc_orc_n1.locid SIMPLE [(loc_staging_n1)loc_staging_n1.FieldSchema(name:locid, type:int, comment:null), ] -POSTHOOK: Lineage: loc_orc_n1.state SIMPLE [(loc_staging_n1)loc_staging_n1.FieldSchema(name:state, type:string, comment:null), ] -POSTHOOK: Lineage: loc_orc_n1.year SIMPLE [(loc_staging_n1)loc_staging_n1.FieldSchema(name:year, type:int, comment:null), ] -POSTHOOK: Lineage: loc_orc_n1.zip SIMPLE [(loc_staging_n1)loc_staging_n1.FieldSchema(name:zip, type:bigint, comment:null), ] -PREHOOK: query: explain select * from loc_orc_n1 -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from loc_orc_n1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: loc_orc_n1 - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - outputColumnNames: state, locid, zip, year - ListSink - -PREHOOK: query: analyze table loc_orc_n1 compute statistics for columns state -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@loc_orc_n1 -PREHOOK: Output: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: analyze table loc_orc_n1 compute statistics for columns state -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@loc_orc_n1 -POSTHOOK: Output: default@loc_orc_n1 -#### A masked pattern was here #### -PREHOOK: query: explain select a, c, min(b) -from ( select state as a, locid as b, count(*) as c - from loc_orc_n1 - group by state,locid - ) sq1 -group by a,c -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select a, c, min(b) -from ( select state as a, locid as b, count(*) as c - from loc_orc_n1 - group by state,locid - ) sq1 -group by a,c -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: sq1:loc_orc_n1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: state (type: string), locid (type: int) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid, $f2 - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(locid) - keys: state (type: string), $f2 (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: state, $f2, $f2_0 - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: analyze table loc_orc_n1 compute statistics for columns state,locid,year -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@loc_orc_n1 -PREHOOK: Output: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: analyze table loc_orc_n1 compute statistics for columns state,locid,year -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@loc_orc_n1 -POSTHOOK: Output: default@loc_orc_n1 -#### A masked pattern was here #### -PREHOOK: query: explain select year from loc_orc_n1 group by year -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select year from loc_orc_n1 group by year -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: year (type: int) - outputColumnNames: year - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: year (type: int) - minReductionHashAggr: 0.75 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: year - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 5632 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 32 Data size: 5632 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with rollup -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with rollup -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 24 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by rollup (state,locid) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by rollup (state,locid) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 24 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state)) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 1408 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 1408 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid)) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 2816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 16 Data size: 2816 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid),()) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid),()) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 24 Data size: 4224 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state,locid),(state),(locid),()) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state,locid),(state),(locid),()) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 5632 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 32 Data size: 5632 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select year from loc_orc_n1 group by year -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select year from loc_orc_n1 group by year -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: year (type: int) - outputColumnNames: year - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: year (type: int) - minReductionHashAggr: 0.875 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: year - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 2816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 16 Data size: 2816 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,zip from loc_orc_n1 group by state,zip -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,zip from loc_orc_n1 group by state,zip -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), zip (type: bigint) - outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: state (type: string), zip (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: state, zip - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with rollup -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with rollup -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by rollup (state,locid) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by rollup (state,locid) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state)) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid)) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid),()) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state),(locid),()) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state,locid),(state),(locid),()) -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid grouping sets((state,locid),(state),(locid),()) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select year from loc_orc_n1 group by year -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select year from loc_orc_n1 group by year -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: year (type: int) - outputColumnNames: year - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: year (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: year - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube -PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -POSTHOOK: query: explain select state,locid from loc_orc_n1 group by state,locid with cube -POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc_n1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: loc_orc_n1 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: state (type: string), locid (type: int), '0L' (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: state, locid - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_auto_join0.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_auto_join0.q.out deleted file mode 100644 index cdb09525f5..0000000000 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_auto_join0.q.out +++ /dev/null @@ -1,256 +0,0 @@ -Warning: Shuffle Join MERGEJOIN[16][tables = [a:cbo_t1:cbo_t3, a:cbo_t2:cbo_t3]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: explain -select sum(hash(a.k1,a.v1,a.k2, a.v2)) -from ( -SELECT cbo_t1.key as k1, cbo_t1.value as v1, - cbo_t2.key as k2, cbo_t2.value as v2 FROM - (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t1 - JOIN - (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t2 - SORT BY k1, v1, k2, v2 -) a -PREHOOK: type: QUERY -PREHOOK: Input: default@cbo_t3 -#### A masked pattern was here #### -POSTHOOK: query: explain -select sum(hash(a.k1,a.v1,a.k2, a.v2)) -from ( -SELECT cbo_t1.key as k1, cbo_t1.value as v1, - cbo_t2.key as k2, cbo_t2.value as v2 FROM - (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t1 - JOIN - (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t2 - SORT BY k1, v1, k2, v2 -) a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@cbo_t3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a:cbo_t1:cbo_t3 - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 20 Data size: 3230 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), value (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: a:cbo_t2:cbo_t3 - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 20 Data size: 3230 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), value (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: key, value, key0, value0 - Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(key,value,key0,value0) (type: int) - outputColumnNames: $f0 - Statistics: Num rows: 36 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum($f0) - minReductionHashAggr: 0.9722222 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: $f0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[16][tables = [a:cbo_t1:cbo_t3, a:cbo_t2:cbo_t3]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: explain -select sum(hash(a.k1,a.v1,a.k2, a.v2)) -from ( -SELECT cbo_t1.key as k1, cbo_t1.value as v1, - cbo_t2.key as k2, cbo_t2.value as v2 FROM - (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t1 - JOIN - (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t2 - SORT BY k1, v1, k2, v2 -) a -PREHOOK: type: QUERY -PREHOOK: Input: default@cbo_t3 -#### A masked pattern was here #### -POSTHOOK: query: explain -select sum(hash(a.k1,a.v1,a.k2, a.v2)) -from ( -SELECT cbo_t1.key as k1, cbo_t1.value as v1, - cbo_t2.key as k2, cbo_t2.value as v2 FROM - (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t1 - JOIN - (SELECT * FROM cbo_t3 WHERE cbo_t3.key < 10) cbo_t2 - SORT BY k1, v1, k2, v2 -) a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@cbo_t3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a:cbo_t1:cbo_t3 - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 20 Data size: 3230 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), value (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: a:cbo_t2:cbo_t3 - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 20 Data size: 3230 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), value (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: key, value, key0, value0 - Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(key,value,key0,value0) (type: int) - outputColumnNames: $f0 - Statistics: Num rows: 36 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum($f0) - minReductionHashAggr: 0.9722222 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: $f0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_auto_join17.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_auto_join17.q.out deleted file mode 100644 index d28a10406d..0000000000 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_auto_join17.q.out +++ /dev/null @@ -1,173 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n112(key1 INT, value1 STRING, key2 INT, value2 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n112 -POSTHOOK: query: CREATE TABLE dest1_n112(key1 INT, value1 STRING, key2 INT, value2 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n112 -PREHOOK: query: explain -FROM src src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n112 SELECT src1.*, src2.* -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n112 -POSTHOOK: query: explain -FROM src src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n112 SELECT src1.*, src2.* -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n112 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: key, value, key0, value0 - input vertices: - 1 Map 3 - Statistics: Num rows: 791 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), UDFToInteger(key0) (type: int), value0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n112 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) - outputColumnNames: key1, value1, key2, value2 - Statistics: Num rows: 791 Data size: 150290 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n112 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key1, value1, key2, value2 - Column Types: int, string, int, string - Table: default.dest1_n112 - -PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n112 SELECT src1.*, src2.* -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n112 -POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest1_n112 SELECT src1.*, src2.* -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n112 -POSTHOOK: Lineage: dest1_n112.key1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n112.key2 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n112.value1 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n112.value2 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT sum(hash(dest1_n112.key1,dest1_n112.value1,dest1_n112.key2,dest1_n112.value2)) FROM dest1_n112 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n112 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(dest1_n112.key1,dest1_n112.value1,dest1_n112.key2,dest1_n112.value2)) FROM dest1_n112 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n112 -#### A masked pattern was here #### --793937029770 diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_cross_product_check_2.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_cross_product_check_2.q.out deleted file mode 100644 index dc5cd23f3c..0000000000 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_cross_product_check_2.q.out +++ /dev/null @@ -1,688 +0,0 @@ -PREHOOK: query: create table A_n18 (key string, value string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@A_n18 -POSTHOOK: query: create table A_n18 (key string, value string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@A_n18 -PREHOOK: query: insert into A_n18 -select * from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@a_n18 -POSTHOOK: query: insert into A_n18 -select * from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@a_n18 -POSTHOOK: Lineage: a_n18.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: a_n18.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table B_n14 (key string, value string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@B_n14 -POSTHOOK: query: create table B_n14 (key string, value string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@B_n14 -PREHOOK: query: insert into B_n14 -select * from src order by key -limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@b_n14 -POSTHOOK: query: insert into B_n14 -select * from src order by key -limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@b_n14 -POSTHOOK: Lineage: b_n14.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: b_n14.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -Warning: Shuffle Join MERGEJOIN[8][tables = [a_n18, b_n14]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: explain select * from A_n18 join B_n14 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n18 -PREHOOK: Input: default@b_n14 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from A_n18 join B_n14 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n18 -POSTHOOK: Input: default@b_n14 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a_n18 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), value (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b_n14 - Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), value (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: key, value, key0, value0 - Statistics: Num rows: 5000 Data size: 1770000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5000 Data size: 1770000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, a_n18]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: explain select * from B_n14 d1 join B_n14 d2 on d1.key = d2.key join A_n18 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n18 -PREHOOK: Input: default@b_n14 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from B_n14 d1 join B_n14 d2 on d1.key = d2.key join A_n18 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n18 -POSTHOOK: Input: default@b_n14 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: d1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: key, value, key0, value0 - input vertices: - 1 Map 3 - Statistics: Num rows: 20 Data size: 7040 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 20 Data size: 7040 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: d2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: a_n18 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), value (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: key, value, key0, value0, key1, value1 - Statistics: Num rows: 10000 Data size: 5300000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10000 Data size: 5300000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[34][tables = [a_n18, ]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: explain select * from A_n18 join - (select d1.key - from B_n14 d1 join B_n14 d2 on d1.key = d2.key - where 1 = 1 group by d1.key) od1 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n18 -PREHOOK: Input: default@b_n14 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from A_n18 join - (select d1.key - from B_n14 d1 join B_n14 d2 on d1.key = d2.key - where 1 = 1 group by d1.key) od1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n18 -POSTHOOK: Input: default@b_n14 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 3 <- Map 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a_n18 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), value (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: od1:d1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: key - input vertices: - 1 Map 5 - Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string) - minReductionHashAggr: 0.75 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: od1:d2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: key, value, key0 - Statistics: Num rows: 2500 Data size: 660000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2500 Data size: 660000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: key - Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[17][tables = [od1:d1, od1:d2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[18][tables = [a_n18, ]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: explain select * from A_n18 join (select d1.key from B_n14 d1 join B_n14 d2 where 1 = 1 group by d1.key) od1 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n18 -PREHOOK: Input: default@b_n14 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from A_n18 join (select d1.key from B_n14 d1 join B_n14 d2 where 1 = 1 group by d1.key) od1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n18 -POSTHOOK: Input: default@b_n14 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) - Reducer 4 <- Map 3 (XPROD_EDGE), Map 6 (XPROD_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a_n18 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), value (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: od1:d1 - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: od1:d2 - Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: key, value, key0 - Statistics: Num rows: 2500 Data size: 660000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2500 Data size: 660000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: key - Statistics: Num rows: 100 Data size: 8600 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 100 Data size: 8600 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string) - minReductionHashAggr: 0.95 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: key - Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[37][tables = [, ]] in Stage 'Reducer 3' is a cross product -PREHOOK: query: explain select * from -(select A_n18.key from A_n18 group by key) ss join -(select d1.key from B_n14 d1 join B_n14 d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n18 -PREHOOK: Input: default@b_n14 -#### A masked pattern was here #### -POSTHOOK: query: explain select * from -(select A_n18.key from A_n18 group by key) ss join -(select d1.key from B_n14 d1 join B_n14 d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n18 -POSTHOOK: Input: default@b_n14 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 4 <- Map 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: ss:a_n18 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: od1:d1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: key - input vertices: - 1 Map 6 - Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string) - minReductionHashAggr: 0.75 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: od1:d2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: key - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: key, key0 - Statistics: Num rows: 1250 Data size: 216250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1250 Data size: 216250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: key - Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_gby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_gby2_map_multi_distinct.q.out deleted file mode 100644 index 462186a104..0000000000 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_gby2_map_multi_distinct.q.out +++ /dev/null @@ -1,348 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n166(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n166 -POSTHOOK: query: CREATE TABLE dest1_n166(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n166 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n166 -SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) -GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n166 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n166 -SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) -GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n166 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) - outputColumnNames: $f0, $f1, $f2 - Statistics: Num rows: 500 Data size: 131000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT $f1), sum($f1), sum(DISTINCT $f1), count($f2) - keys: $f0 (type: string), $f1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: double), _col5 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: $f0, $f1, $f2, $f3, $f4 - Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: $f0 (type: string), UDFToInteger($f1) (type: int), concat($f0, $f2) (type: string), UDFToInteger($f3) (type: int), UDFToInteger($f4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n166 - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) - outputColumnNames: key, c1, c2, c3, c4 - Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n166 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, c1, c2, c3, c4 - Column Types: string, int, string, int, int - Table: default.dest1_n166 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n166 -SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) -GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n166 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n166 -SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) -GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n166 -POSTHOOK: Lineage: dest1_n166.c1 EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: dest1_n166.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n166.c3 EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: dest1_n166.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n166.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n166.* FROM dest1_n166 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n166 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n166.* FROM dest1_n166 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n166 -#### A masked pattern was here #### -0 1 00.0 0 3 -1 71 116414.0 10044 115 -2 69 225571.0 15780 111 -3 62 332004.0 20119 99 -4 74 452763.0 30965 124 -5 6 5397.0 278 10 -6 5 6398.0 331 6 -7 6 7735.0 447 10 -8 8 8762.0 595 10 -9 7 91047.0 577 12 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n166 -SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) -GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n166 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n166 -SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) -GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n166 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) - outputColumnNames: $f0, $f1, $f2 - Statistics: Num rows: 500 Data size: 131000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT $f0), sum($f1), sum(DISTINCT $f1), count($f2) - keys: $f0 (type: string), $f1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: double), _col5 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: $f0, $f1, $f2, $f3, $f4 - Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: $f0 (type: string), UDFToInteger($f1) (type: int), concat($f0, $f2) (type: string), UDFToInteger($f3) (type: int), UDFToInteger($f4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n166 - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) - outputColumnNames: key, c1, c2, c3, c4 - Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n166 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, c1, c2, c3, c4 - Column Types: string, int, string, int, int - Table: default.dest1_n166 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n166 -SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) -GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n166 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n166 -SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) -GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n166 -POSTHOOK: Lineage: dest1_n166.c1 EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: dest1_n166.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n166.c3 EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: dest1_n166.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n166.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n166.* FROM dest1_n166 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n166 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n166.* FROM dest1_n166 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n166 -#### A masked pattern was here #### -0 1 00.0 0 3 -1 1 116414.0 10044 115 -2 1 225571.0 15780 111 -3 1 332004.0 20119 99 -4 1 452763.0 30965 124 -5 1 5397.0 278 10 -6 1 6398.0 331 6 -7 1 7735.0 447 10 -8 1 8762.0 595 10 -9 1 91047.0 577 12 diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_join1.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_join1.q.out deleted file mode 100644 index d3a6c1c9f0..0000000000 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_join1.q.out +++ /dev/null @@ -1,476 +0,0 @@ -PREHOOK: query: CREATE TABLE myinput1_n0(key int, value int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@myinput1_n0 -POSTHOOK: query: CREATE TABLE myinput1_n0(key int, value int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@myinput1_n0 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_n0 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@myinput1_n0 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_n0 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@myinput1_n0 -Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND b.key = 40 -PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND b.key = 40 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: int), (key = 40) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: int), (key = 40) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - filter predicates: - 0 {VALUE._col2} - 1 {VALUE._col2} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col0,_col1,_col3,_col4) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND b.key = 40 -PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND b.key = 40 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -4939870 -Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.value = 40 AND a.key = a.value AND b.key = 40 -PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.value = 40 AND a.key = a.value AND b.key = 40 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: int), (key = 40) (type: boolean), (value = 40) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: int), (key = 40) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - filter predicates: - 0 {VALUE._col2} {VALUE._col3} - 1 {VALUE._col2} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col4, _col5 - Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col0,_col1,_col4,_col5) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.key = a.value AND b.key = 40 -PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.key = a.value AND b.key = 40 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -4939870 -Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.key = b.key AND b.key = 40 -PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.key = b.key AND b.key = 40 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: int), (key = 40) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: int), (key = 40) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - filter predicates: - 0 {VALUE._col2} - 1 {VALUE._col2} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col0,_col1,_col3,_col4) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.key = b.key AND b.key = 40 -PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key = 40 AND a.key = b.key AND b.key = 40 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -4939870 -Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value -PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: int), (key > 40) (type: boolean), (value > 50) (type: boolean), (key = value) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: int), (key > 40) (type: boolean), (value > 50) (type: boolean), (key = value) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - filter predicates: - 0 {VALUE._col2} {VALUE._col3} {VALUE._col4} - 1 {VALUE._col2} {VALUE._col3} {VALUE._col4} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col0,_col1,_col5,_col6) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value -PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n0 a FULL OUTER JOIN myinput1_n0 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n0 -#### A masked pattern was here #### -4939870 diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_outer_join_ppr.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_outer_join_ppr.q.out deleted file mode 100644 index f22770ad47..0000000000 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_outer_join_ppr.q.out +++ /dev/null @@ -1,666 +0,0 @@ -PREHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: ((UDFToDouble(key) < 20.0D) and (UDFToDouble(key) > 15.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) < 20.0D) and (UDFToDouble(key) > 15.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: value (type: string) - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 111 Data size: 30192 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 30192 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - value expressions: value (type: string) - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: key, value, key0, value0 - Position of Big Table: 1 - Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string) - outputColumnNames: key, value, key0, value0 - Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,key0,value0 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -17 val_17 17 val_17 -17 val_17 17 val_17 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -19 val_19 19 val_19 -19 val_19 19 val_19 -PREHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: ((UDFToDouble(key) < 20.0D) and (UDFToDouble(key) > 15.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) < 20.0D) and (UDFToDouble(key) > 15.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: value (type: string) - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - filterExpr: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0D) and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 111 Data size: 30192 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 30192 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - value expressions: value (type: string) - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: key, value, key0, value0 - Position of Big Table: 1 - Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string) - outputColumnNames: key, value, key0, value0 - Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,key0,value0 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -17 val_17 17 val_17 -17 val_17 17 val_17 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -19 val_19 19 val_19 -19 val_19 19 val_19 diff --git a/ql/src/test/results/clientpositive/llap/cbo_stats_estimation.q.out b/ql/src/test/results/clientpositive/llap/cbo_stats_estimation.q.out deleted file mode 100644 index ccd9f4e5a0..0000000000 --- a/ql/src/test/results/clientpositive/llap/cbo_stats_estimation.q.out +++ /dev/null @@ -1,296 +0,0 @@ -PREHOOK: query: CREATE TABLE claims(claim_rec_id bigint, claim_invoice_num string, typ_c int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@claims -POSTHOOK: query: CREATE TABLE claims(claim_rec_id bigint, claim_invoice_num string, typ_c int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@claims -PREHOOK: query: ALTER TABLE claims UPDATE STATISTICS set ('numRows'='1154941534','rawDataSize'='1135307527922') -PREHOOK: type: ALTERTABLE_UPDATETABLESTATS -PREHOOK: Input: default@claims -PREHOOK: Output: default@claims -POSTHOOK: query: ALTER TABLE claims UPDATE STATISTICS set ('numRows'='1154941534','rawDataSize'='1135307527922') -POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS -POSTHOOK: Input: default@claims -POSTHOOK: Output: default@claims -PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 -PREHOOK: type: QUERY -PREHOOK: Input: default@claims -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@claims -#### A masked pattern was here #### -OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` -FROM `default`.`claims` -WHERE `typ_c` = 3 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: claims - filterExpr: (typ_c = 3) (type: boolean) - Statistics: Num rows: 1154941534 Data size: 1135307527922 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (typ_c = 3) (type: boolean) - Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: claims - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns claim_rec_id,claim_invoice_num,typ_c - columns.comments - columns.types bigint:string:int -#### A masked pattern was here #### - name default.claims - numFiles 0 - numRows 1154941534 - rawDataSize 1135307527922 - serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns claim_rec_id,claim_invoice_num,typ_c - columns.comments - columns.types bigint:string:int -#### A masked pattern was here #### - name default.claims - numFiles 0 - numRows 1154941534 - rawDataSize 1135307527922 - serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.claims - name: default.claims - Truncated Path -> Alias: - /claims [claims] - Reducer 2 - Execution mode: vectorized, llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 -PREHOOK: type: QUERY -PREHOOK: Input: default@claims -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@claims -#### A masked pattern was here #### -OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` -FROM `default`.`claims` -WHERE `typ_c` = 3 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: claims - filterExpr: (typ_c = 3) (type: boolean) - Statistics: Num rows: 1154941534 Data size: 1135307527922 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (typ_c = 3) (type: boolean) - Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: claims - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns claim_rec_id,claim_invoice_num,typ_c - columns.comments - columns.types bigint:string:int -#### A masked pattern was here #### - name default.claims - numFiles 0 - numRows 1154941534 - rawDataSize 1135307527922 - serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns claim_rec_id,claim_invoice_num,typ_c - columns.comments - columns.types bigint:string:int -#### A masked pattern was here #### - name default.claims - numFiles 0 - numRows 1154941534 - rawDataSize 1135307527922 - serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.claims - name: default.claims - Truncated Path -> Alias: - /claims [claims] - Reducer 2 - Execution mode: vectorized, llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/cbo_union_view.q.out b/ql/src/test/results/clientpositive/llap/cbo_union_view.q.out deleted file mode 100644 index 6f86ced9a2..0000000000 --- a/ql/src/test/results/clientpositive/llap/cbo_union_view.q.out +++ /dev/null @@ -1,284 +0,0 @@ -PREHOOK: query: CREATE TABLE src_union_1 (key int, value string) PARTITIONED BY (ds string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_union_1 -POSTHOOK: query: CREATE TABLE src_union_1 (key int, value string) PARTITIONED BY (ds string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_union_1 -PREHOOK: query: CREATE TABLE src_union_2 (key int, value string) PARTITIONED BY (ds string, part_1 string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_union_2 -POSTHOOK: query: CREATE TABLE src_union_2 (key int, value string) PARTITIONED BY (ds string, part_1 string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_union_2 -PREHOOK: query: CREATE TABLE src_union_3 (key int, value string) PARTITIONED BY (ds string, part_1 string, part_2 string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_union_3 -POSTHOOK: query: CREATE TABLE src_union_3 (key int, value string) PARTITIONED BY (ds string, part_1 string, part_2 string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_union_3 -PREHOOK: query: CREATE VIEW src_union_view PARTITIONED ON (ds) as -SELECT key, value, ds FROM ( -SELECT key, value, ds FROM src_union_1 -UNION ALL -SELECT key, value, ds FROM src_union_2 -UNION ALL -SELECT key, value, ds FROM src_union_3 -) subq -PREHOOK: type: CREATEVIEW -PREHOOK: Input: default@src_union_1 -PREHOOK: Input: default@src_union_2 -PREHOOK: Input: default@src_union_3 -PREHOOK: Output: database:default -PREHOOK: Output: default@src_union_view -POSTHOOK: query: CREATE VIEW src_union_view PARTITIONED ON (ds) as -SELECT key, value, ds FROM ( -SELECT key, value, ds FROM src_union_1 -UNION ALL -SELECT key, value, ds FROM src_union_2 -UNION ALL -SELECT key, value, ds FROM src_union_3 -) subq -POSTHOOK: type: CREATEVIEW -POSTHOOK: Input: default@src_union_1 -POSTHOOK: Input: default@src_union_2 -POSTHOOK: Input: default@src_union_3 -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_union_view -POSTHOOK: Lineage: src_union_view.key EXPRESSION [(src_union_1)src_union_1.FieldSchema(name:key, type:int, comment:null), (src_union_2)src_union_2.FieldSchema(name:key, type:int, comment:null), (src_union_3)src_union_3.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: src_union_view.value EXPRESSION [(src_union_1)src_union_1.FieldSchema(name:value, type:string, comment:null), (src_union_2)src_union_2.FieldSchema(name:value, type:string, comment:null), (src_union_3)src_union_3.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_union_1 -PREHOOK: Input: default@src_union_2 -PREHOOK: Input: default@src_union_3 -PREHOOK: Input: default@src_union_view -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_union_1 -POSTHOOK: Input: default@src_union_2 -POSTHOOK: Input: default@src_union_3 -POSTHOOK: Input: default@src_union_view -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) - Map 4 <- Union 2 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src_union_1 - filterExpr: (key = 86) (type: boolean) - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = 86) (type: boolean) - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), ds (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: unknown - Map 3 - Map Operator Tree: - TableScan - alias: src_union_2 - filterExpr: (key = 86) (type: boolean) - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = 86) (type: boolean) - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), ds (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: unknown - Map 4 - Map Operator Tree: - TableScan - alias: src_union_3 - filterExpr: (key = 86) (type: boolean) - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = 86) (type: boolean) - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), ds (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 86 (type: int), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: unknown - Union 2 - Vertex: Union 2 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='1' -PREHOOK: type: QUERY -PREHOOK: Input: default@src_union_1 -PREHOOK: Input: default@src_union_2 -PREHOOK: Input: default@src_union_3 -PREHOOK: Input: default@src_union_view -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN SELECT key, value, ds FROM src_union_view WHERE key=86 AND ds ='1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_union_1 -POSTHOOK: Input: default@src_union_2 -POSTHOOK: Input: default@src_union_3 -POSTHOOK: Input: default@src_union_view -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) - Map 4 <- Union 2 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src_union_1 - filterExpr: ((key = 86) and (ds = '1')) (type: boolean) - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key = 86) and (ds = '1')) (type: boolean) - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 86 (type: int), _col0 (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 519 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 519 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: unknown - Map 3 - Map Operator Tree: - TableScan - alias: src_union_2 - filterExpr: ((key = 86) and (ds = '1')) (type: boolean) - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key = 86) and (ds = '1')) (type: boolean) - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 86 (type: int), _col0 (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 519 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 519 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: unknown - Map 4 - Map Operator Tree: - TableScan - alias: src_union_3 - filterExpr: ((key = 86) and (ds = '1')) (type: boolean) - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key = 86) and (ds = '1')) (type: boolean) - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 86 (type: int), _col0 (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 519 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 519 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: unknown - Union 2 - Vertex: Union 2 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/check_constraint.q.out b/ql/src/test/results/clientpositive/llap/check_constraint.q.out index e4fe16427f..bc5d361859 100644 --- a/ql/src/test/results/clientpositive/llap/check_constraint.q.out +++ b/ql/src/test/results/clientpositive/llap/check_constraint.q.out @@ -2070,10 +2070,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_uami_n0 - filterExpr: ((de) IN (103, 119) and enforce_constraint((893.14 >= CAST( i AS decimal(5,2))) is not false)) (type: boolean) + filterExpr: (de) IN (103, 119) (type: boolean) Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((de) IN (103, 119) and enforce_constraint((893.14 >= CAST( i AS decimal(5,2))) is not false)) (type: boolean) + predicate: (de) IN (103, 119) (type: boolean) Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct), i (type: int), vc (type: varchar(128)) @@ -2095,15 +2095,18 @@ STAGE PLANS: expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 893.14 (type: decimal(5,2)), VALUE._col1 (type: varchar(128)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Filter Operator + predicate: enforce_constraint((_col2 is not null and (_col2 >= CAST( _col1 AS decimal(5,2))) is not false)) (type: boolean) Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid_uami_n0 - Write Type: UPDATE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acid_uami_n0 + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -2199,18 +2202,25 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 893.14 (type: decimal(5,2)), 'apache_hive' (type: varchar(128)) + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 893.14 (type: decimal(5,2)), 'apache_hive' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Filter Operator + predicate: enforce_constraint(_col2 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid_uami_n0 - Write Type: UPDATE + Select Operator + expressions: _col0 (type: struct), _col1 (type: int), _col2 (type: decimal(5,2)), CAST( _col3 AS varchar(128)) (type: varchar(128)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acid_uami_n0 + Write Type: UPDATE Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/clusterctas.q.out b/ql/src/test/results/clientpositive/llap/clusterctas.q.out deleted file mode 100644 index 40ceee215f..0000000000 --- a/ql/src/test/results/clientpositive/llap/clusterctas.q.out +++ /dev/null @@ -1,145 +0,0 @@ -PREHOOK: query: EXPLAIN -CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS -SELECT * FROM SRC x CLUSTER BY x.key -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@x -POSTHOOK: query: EXPLAIN -CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS -SELECT * FROM SRC x CLUSTER BY x.key -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@x -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.x - Write Type: INSERT - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: col1, col2 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-4 - Create Table - columns: key string, value string - name: default.x - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde name: org.apache.hadoop.hive.ql.io.orc.OrcSerde - table properties: - transactional true - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.x - - Stage: Stage-0 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Write Type: INSERT - -PREHOOK: query: CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS -SELECT * FROM SRC x CLUSTER BY x.key -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@x -POSTHOOK: query: CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS -SELECT * FROM SRC x CLUSTER BY x.key -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@x -POSTHOOK: Lineage: x.key SIMPLE [(src)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: x.value SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: DROP TABLE x -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@x -PREHOOK: Output: default@x -POSTHOOK: query: DROP TABLE x -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@x -POSTHOOK: Output: default@x diff --git a/ql/src/test/results/clientpositive/llap/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/llap/columnstats_partlvl.q.out deleted file mode 100644 index 1495781287..0000000000 --- a/ql/src/test/results/clientpositive/llap/columnstats_partlvl.q.out +++ /dev/null @@ -1,1047 +0,0 @@ -PREHOOK: query: DROP TABLE Employee_Part -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE Employee_Part -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double) -row format delimited fields terminated by '|' stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@Employee_Part -POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double) -row format delimited fields terminated by '|' stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@Employee_Part -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=2000.0) -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=2000.0) -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0 -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=4000.0) -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=4000.0) -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=4000.0 -PREHOOK: query: explain -analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@employee_part -PREHOOK: Input: default@employee_part@employeesalary=2000.0 -PREHOOK: Output: default@employee_part -PREHOOK: Output: default@employee_part@employeesalary=2000.0 -#### A masked pattern was here #### -POSTHOOK: query: explain -analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@employee_part -POSTHOOK: Input: default@employee_part@employeesalary=2000.0 -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: employee_part - filterExpr: (employeesalary = 2000.0D) (type: boolean) - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: employeeid (type: int) - outputColumnNames: employeeid - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: compute_stats(employeeid, 'hll') - keys: 2000.0D (type: double) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: 2000.0D (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: 2000.0D (type: double) - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0) - keys: 2000.0D (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: struct), 2000.0D (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: employeeID - Column Types: int - Table: default.employee_part - -PREHOOK: query: explain extended -analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@employee_part -PREHOOK: Input: default@employee_part@employeesalary=2000.0 -PREHOOK: Output: default@employee_part -PREHOOK: Output: default@employee_part@employeesalary=2000.0 -#### A masked pattern was here #### -POSTHOOK: query: explain extended -analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@employee_part -POSTHOOK: Input: default@employee_part@employeesalary=2000.0 -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: employee_part - filterExpr: (employeesalary = 2000.0D) (type: boolean) - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - Statistics Aggregation Key Prefix: default.employee_part/ - GatherStats: true - Select Operator - expressions: employeeid (type: int) - outputColumnNames: employeeid - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: compute_stats(employeeid, 'hll') - keys: 2000.0D (type: double) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: 2000.0D (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: 2000.0D (type: double) - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - tag: -1 - value expressions: _col1 (type: struct) - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: employeesalary=2000.0 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - employeesalary 2000.0 - properties: - bucket_count -1 - column.name.delimiter , - columns employeeid,employeename - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.employee_part - numFiles 1 - numRows 0 - partition_columns employeesalary - partition_columns.types double - rawDataSize 0 - serialization.ddl struct employee_part { i32 employeeid, string employeename} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 105 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns employeeid,employeename - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.employee_part - partition_columns employeesalary - partition_columns.types double - serialization.ddl struct employee_part { i32 employeeid, string employeename} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.employee_part - name: default.employee_part - Truncated Path -> Alias: - /employee_part/employeesalary=2000.0 [employee_part] - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0) - keys: 2000.0D (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: struct), 2000.0D (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types struct:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Stats Aggregation Key Prefix: default.employee_part/ - Column Stats Desc: - Columns: employeeID - Column Types: int - Table: default.employee_part - Is Table Level Stats: false - -PREHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@employee_part -PREHOOK: Input: default@employee_part@employeesalary=2000.0 -PREHOOK: Output: default@employee_part -PREHOOK: Output: default@employee_part@employeesalary=2000.0 -#### A masked pattern was here #### -POSTHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@employee_part -POSTHOOK: Input: default@employee_part@employeesalary=2000.0 -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0 -#### A masked pattern was here #### -PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@employee_part -POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@employee_part -# col_name data_type comment -employeeid int -employeename string - -# Partition Information -# col_name data_type comment -employeesalary double - -# Detailed Partition Information -Partition Value: [2000.0] -Database: default -Table: employee_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"employeeid\":\"true\"}} - numFiles 1 - numRows 13 - rawDataSize 92 - totalSize 105 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - field.delim | - serialization.format | -PREHOOK: query: explain -analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@employee_part -PREHOOK: Input: default@employee_part@employeesalary=4000.0 -PREHOOK: Output: default@employee_part -PREHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -POSTHOOK: query: explain -analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@employee_part -POSTHOOK: Input: default@employee_part@employeesalary=4000.0 -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: employee_part - filterExpr: (employeesalary = 4000.0D) (type: boolean) - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: employeeid (type: int) - outputColumnNames: employeeid - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: compute_stats(employeeid, 'hll') - keys: 4000.0D (type: double) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: 4000.0D (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: 4000.0D (type: double) - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0) - keys: 4000.0D (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: struct), 4000.0D (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: employeeID - Column Types: int - Table: default.employee_part - -PREHOOK: query: explain extended -analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@employee_part -PREHOOK: Input: default@employee_part@employeesalary=4000.0 -PREHOOK: Output: default@employee_part -PREHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -POSTHOOK: query: explain extended -analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@employee_part -POSTHOOK: Input: default@employee_part@employeesalary=4000.0 -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: employee_part - filterExpr: (employeesalary = 4000.0D) (type: boolean) - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - Statistics Aggregation Key Prefix: default.employee_part/ - GatherStats: true - Select Operator - expressions: employeeid (type: int) - outputColumnNames: employeeid - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: compute_stats(employeeid, 'hll') - keys: 4000.0D (type: double) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: 4000.0D (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: 4000.0D (type: double) - Statistics: Num rows: 3 Data size: 1062 Basic stats: PARTIAL Column stats: NONE - tag: -1 - value expressions: _col1 (type: struct) - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: employeesalary=4000.0 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - employeesalary 4000.0 - properties: - bucket_count -1 - column.name.delimiter , - columns employeeid,employeename - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.employee_part - numFiles 1 - numRows 0 - partition_columns employeesalary - partition_columns.types double - rawDataSize 0 - serialization.ddl struct employee_part { i32 employeeid, string employeename} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 105 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns employeeid,employeename - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.employee_part - partition_columns employeesalary - partition_columns.types double - serialization.ddl struct employee_part { i32 employeeid, string employeename} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.employee_part - name: default.employee_part - Truncated Path -> Alias: - /employee_part/employeesalary=4000.0 [employee_part] - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0) - keys: 4000.0D (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: struct), 4000.0D (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 354 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types struct:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Stats Aggregation Key Prefix: default.employee_part/ - Column Stats Desc: - Columns: employeeID - Column Types: int - Table: default.employee_part - Is Table Level Stats: false - -PREHOOK: query: analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@employee_part -PREHOOK: Input: default@employee_part@employeesalary=4000.0 -PREHOOK: Output: default@employee_part -PREHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -POSTHOOK: query: analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@employee_part -POSTHOOK: Input: default@employee_part@employeesalary=4000.0 -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -PREHOOK: query: explain -analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@employee_part -PREHOOK: Input: default@employee_part@employeesalary=2000.0 -PREHOOK: Output: default@employee_part -PREHOOK: Output: default@employee_part@employeesalary=2000.0 -#### A masked pattern was here #### -POSTHOOK: query: explain -analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@employee_part -POSTHOOK: Input: default@employee_part@employeesalary=2000.0 -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: employee_part - filterExpr: (employeesalary = 2000.0D) (type: boolean) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: employeeid (type: int), employeename (type: string) - outputColumnNames: employeeid, employeename - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') - keys: 2000.0D (type: double) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: 2000.0D (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: 2000.0D (type: double) - Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: struct), _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: 2000.0D (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), 2000.0D (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: employeeid, employeename - Column Types: int, string - Table: default.employee_part - -PREHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@employee_part -PREHOOK: Input: default@employee_part@employeesalary=2000.0 -PREHOOK: Output: default@employee_part -PREHOOK: Output: default@employee_part@employeesalary=2000.0 -#### A masked pattern was here #### -POSTHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@employee_part -POSTHOOK: Input: default@employee_part@employeesalary=2000.0 -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0 -#### A masked pattern was here #### -PREHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeID -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@employee_part -POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeID -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@employee_part -col_name employeeID -data_type int -min 16 -max 34 -num_nulls 1 -distinct_count 12 -avg_col_len -max_col_len -num_trues -num_falses -bit_vector HL -comment from deserializer -PREHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeName -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@employee_part -POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeName -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@employee_part -col_name employeeName -data_type string -min -max -num_nulls 1 -distinct_count 12 -avg_col_len 4.3076923076923075 -max_col_len 6 -num_trues -num_falses -bit_vector HL -comment from deserializer -PREHOOK: query: explain -analyze table Employee_Part compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@employee_part -PREHOOK: Input: default@employee_part@employeesalary=2000.0 -PREHOOK: Input: default@employee_part@employeesalary=4000.0 -PREHOOK: Output: default@employee_part -PREHOOK: Output: default@employee_part@employeesalary=2000.0 -PREHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -POSTHOOK: query: explain -analyze table Employee_Part compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@employee_part -POSTHOOK: Input: default@employee_part@employeesalary=2000.0 -POSTHOOK: Input: default@employee_part@employeesalary=4000.0 -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0 -POSTHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: employee_part - Statistics: Num rows: 26 Data size: 2596 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: employeeid (type: int), employeename (type: string), employeesalary (type: double) - outputColumnNames: employeeid, employeename, employeesalary - Statistics: Num rows: 26 Data size: 2596 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') - keys: employeesalary (type: double) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1744 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 2 Data size: 1744 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: struct), _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1776 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 1776 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 1776 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: employeeid, employeename - Column Types: int, string - Table: default.employee_part - -PREHOOK: query: analyze table Employee_Part compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@employee_part -PREHOOK: Input: default@employee_part@employeesalary=2000.0 -PREHOOK: Input: default@employee_part@employeesalary=4000.0 -PREHOOK: Output: default@employee_part -PREHOOK: Output: default@employee_part@employeesalary=2000.0 -PREHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -POSTHOOK: query: analyze table Employee_Part compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@employee_part -POSTHOOK: Input: default@employee_part@employeesalary=2000.0 -POSTHOOK: Input: default@employee_part@employeesalary=4000.0 -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0 -POSTHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) employeeID -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@employee_part -POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) employeeID -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@employee_part -col_name employeeID -data_type int -min 16 -max 34 -num_nulls 1 -distinct_count 12 -avg_col_len -max_col_len -num_trues -num_falses -bit_vector HL -comment from deserializer -PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=4000.0) employeeID -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@employee_part -POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=4000.0) employeeID -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@employee_part -col_name employeeID -data_type int -min 16 -max 34 -num_nulls 1 -distinct_count 12 -avg_col_len -max_col_len -num_trues -num_falses -bit_vector HL -comment from deserializer -PREHOOK: query: explain -analyze table Employee_Part compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@employee_part -PREHOOK: Input: default@employee_part@employeesalary=2000.0 -PREHOOK: Input: default@employee_part@employeesalary=4000.0 -PREHOOK: Output: default@employee_part -PREHOOK: Output: default@employee_part@employeesalary=2000.0 -PREHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -POSTHOOK: query: explain -analyze table Employee_Part compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@employee_part -POSTHOOK: Input: default@employee_part@employeesalary=2000.0 -POSTHOOK: Input: default@employee_part@employeesalary=4000.0 -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0 -POSTHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: employee_part - Statistics: Num rows: 26 Data size: 2300 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: employeeid (type: int), employeename (type: string) - outputColumnNames: employeeid, employeename - Statistics: Num rows: 26 Data size: 2300 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: employeeid, employeename - Column Types: int, string - Table: default.employee_part - -PREHOOK: query: analyze table Employee_Part compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@employee_part -PREHOOK: Input: default@employee_part@employeesalary=2000.0 -PREHOOK: Input: default@employee_part@employeesalary=4000.0 -PREHOOK: Output: default@employee_part -PREHOOK: Output: default@employee_part@employeesalary=2000.0 -PREHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -POSTHOOK: query: analyze table Employee_Part compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@employee_part -POSTHOOK: Input: default@employee_part@employeesalary=2000.0 -POSTHOOK: Input: default@employee_part@employeesalary=4000.0 -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0 -POSTHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -PREHOOK: query: describe formatted Employee_Part employeeID -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@employee_part -POSTHOOK: query: describe formatted Employee_Part employeeID -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@employee_part -col_name employeeID -data_type int -min 16 -max 34 -num_nulls 2 -distinct_count 12 -avg_col_len -max_col_len -num_trues -num_falses -bit_vector HL -comment from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}} -PREHOOK: query: create database if not exists dummydb -PREHOOK: type: CREATEDATABASE -PREHOOK: Output: database:dummydb -POSTHOOK: query: create database if not exists dummydb -POSTHOOK: type: CREATEDATABASE -POSTHOOK: Output: database:dummydb -PREHOOK: query: use dummydb -PREHOOK: type: SWITCHDATABASE -PREHOOK: Input: database:dummydb -POSTHOOK: query: use dummydb -POSTHOOK: type: SWITCHDATABASE -POSTHOOK: Input: database:dummydb -PREHOOK: query: analyze table default.Employee_Part partition (employeeSalary=2000.0) compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@employee_part -PREHOOK: Input: default@employee_part@employeesalary=2000.0 -PREHOOK: Output: default@employee_part -PREHOOK: Output: default@employee_part@employeesalary=2000.0 -#### A masked pattern was here #### -POSTHOOK: query: analyze table default.Employee_Part partition (employeeSalary=2000.0) compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@employee_part -POSTHOOK: Input: default@employee_part@employeesalary=2000.0 -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0 -#### A masked pattern was here #### -PREHOOK: query: describe formatted default.Employee_Part partition (employeeSalary=2000.0) employeeID -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@employee_part -POSTHOOK: query: describe formatted default.Employee_Part partition (employeeSalary=2000.0) employeeID -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@employee_part -col_name employeeID -data_type int -min 16 -max 34 -num_nulls 1 -distinct_count 12 -avg_col_len -max_col_len -num_trues -num_falses -bit_vector HL -comment from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}} -PREHOOK: query: analyze table default.Employee_Part compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@employee_part -PREHOOK: Input: default@employee_part@employeesalary=2000.0 -PREHOOK: Input: default@employee_part@employeesalary=4000.0 -PREHOOK: Output: default@employee_part -PREHOOK: Output: default@employee_part@employeesalary=2000.0 -PREHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -POSTHOOK: query: analyze table default.Employee_Part compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@employee_part -POSTHOOK: Input: default@employee_part@employeesalary=2000.0 -POSTHOOK: Input: default@employee_part@employeesalary=4000.0 -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0 -POSTHOOK: Output: default@employee_part@employeesalary=4000.0 -#### A masked pattern was here #### -PREHOOK: query: use default -PREHOOK: type: SWITCHDATABASE -PREHOOK: Input: database:default -POSTHOOK: query: use default -POSTHOOK: type: SWITCHDATABASE -POSTHOOK: Input: database:default -PREHOOK: query: drop database dummydb -PREHOOK: type: DROPDATABASE -PREHOOK: Input: database:dummydb -PREHOOK: Output: database:dummydb -POSTHOOK: query: drop database dummydb -POSTHOOK: type: DROPDATABASE -POSTHOOK: Input: database:dummydb -POSTHOOK: Output: database:dummydb diff --git a/ql/src/test/results/clientpositive/llap/columnstats_quoting.q.out b/ql/src/test/results/clientpositive/llap/columnstats_quoting.q.out deleted file mode 100644 index 8ac436aeb7..0000000000 --- a/ql/src/test/results/clientpositive/llap/columnstats_quoting.q.out +++ /dev/null @@ -1,168 +0,0 @@ -PREHOOK: query: DROP TABLE IF EXISTS user_web_events -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS user_web_events -POSTHOOK: type: DROPTABLE -PREHOOK: query: create temporary table user_web_events(`user id` bigint, `user name` string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@user_web_events -POSTHOOK: query: create temporary table user_web_events(`user id` bigint, `user name` string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@user_web_events -PREHOOK: query: explain analyze table user_web_events compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@user_web_events -PREHOOK: Output: default@user_web_events -#### A masked pattern was here #### -POSTHOOK: query: explain analyze table user_web_events compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@user_web_events -POSTHOOK: Output: default@user_web_events -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: user_web_events - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: user id (type: bigint), user name (type: string) - outputColumnNames: user id, user name - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(user id, 'hll'), compute_stats(user name, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1072 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1072 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: user id, user name - Column Types: bigint, string - Table: default.user_web_events - -PREHOOK: query: analyze table user_web_events compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@user_web_events -PREHOOK: Output: default@user_web_events -#### A masked pattern was here #### -POSTHOOK: query: analyze table user_web_events compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@user_web_events -POSTHOOK: Output: default@user_web_events -#### A masked pattern was here #### -PREHOOK: query: explain analyze table user_web_events compute statistics for columns `user id` -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@user_web_events -PREHOOK: Output: default@user_web_events -#### A masked pattern was here #### -POSTHOOK: query: explain analyze table user_web_events compute statistics for columns `user id` -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@user_web_events -POSTHOOK: Output: default@user_web_events -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: user_web_events - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: user id (type: bigint) - outputColumnNames: user id - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(user id, 'hll') - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: user id - Column Types: bigint - Table: default.user_web_events - -PREHOOK: query: analyze table user_web_events compute statistics for columns `user id` -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@user_web_events -PREHOOK: Output: default@user_web_events -#### A masked pattern was here #### -POSTHOOK: query: analyze table user_web_events compute statistics for columns `user id` -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@user_web_events -POSTHOOK: Output: default@user_web_events -#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/llap/columnstats_tbllvl.q.out deleted file mode 100644 index 227b14ded3..0000000000 --- a/ql/src/test/results/clientpositive/llap/columnstats_tbllvl.q.out +++ /dev/null @@ -1,963 +0,0 @@ -PREHOOK: query: DROP TABLE IF EXISTS UserVisits_web_text_none -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS UserVisits_web_text_none -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE UserVisits_web_text_none ( - sourceIP string, - destURL string, - visitDate string, - adRevenue float, - userAgent string, - cCode string, - lCode string, - sKeyword string, - avgTimeOnSite int) -row format delimited fields terminated by '|' stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@UserVisits_web_text_none -POSTHOOK: query: CREATE TABLE UserVisits_web_text_none ( - sourceIP string, - destURL string, - visitDate string, - adRevenue float, - userAgent string, - cCode string, - lCode string, - sKeyword string, - avgTimeOnSite int) -row format delimited fields terminated by '|' stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@UserVisits_web_text_none -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@uservisits_web_text_none -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@uservisits_web_text_none -PREHOOK: query: explain -analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@uservisits_web_text_none -PREHOOK: Output: default@uservisits_web_text_none -#### A masked pattern was here #### -POSTHOOK: query: explain -analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@uservisits_web_text_none -POSTHOOK: Output: default@uservisits_web_text_none -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: uservisits_web_text_none - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) - outputColumnNames: sourceip, adrevenue, avgtimeonsite - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: sourceIP, avgTimeOnSite, adRevenue - Column Types: string, int, float - Table: default.uservisits_web_text_none - -PREHOOK: query: explain extended -analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@uservisits_web_text_none -PREHOOK: Output: default@uservisits_web_text_none -#### A masked pattern was here #### -POSTHOOK: query: explain extended -analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@uservisits_web_text_none -POSTHOOK: Output: default@uservisits_web_text_none -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: uservisits_web_text_none - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Statistics Aggregation Key Prefix: default.uservisits_web_text_none/ - GatherStats: true - Select Operator - expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) - outputColumnNames: sourceip, adrevenue, avgtimeonsite - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: uservisits_web_text_none - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite - columns.comments - columns.types string:string:string:float:string:string:string:string:int - field.delim | -#### A masked pattern was here #### - name default.uservisits_web_text_none - numFiles 1 - numRows 0 - rawDataSize 0 - serialization.ddl struct uservisits_web_text_none { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 7060 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite - columns.comments - columns.types string:string:string:float:string:string:string:string:int - field.delim | -#### A masked pattern was here #### - name default.uservisits_web_text_none - numFiles 1 - numRows 0 - rawDataSize 0 - serialization.ddl struct uservisits_web_text_none { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 7060 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.uservisits_web_text_none - name: default.uservisits_web_text_none - Truncated Path -> Alias: - /uservisits_web_text_none [uservisits_web_text_none] - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Stats Aggregation Key Prefix: default.uservisits_web_text_none/ - Column Stats Desc: - Columns: sourceIP, avgTimeOnSite, adRevenue - Column Types: string, int, float - Table: default.uservisits_web_text_none - Is Table Level Stats: true - -PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@uservisits_web_text_none -PREHOOK: Output: default@uservisits_web_text_none -#### A masked pattern was here #### -POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@uservisits_web_text_none -POSTHOOK: Output: default@uservisits_web_text_none -#### A masked pattern was here #### -PREHOOK: query: explain -analyze table default.UserVisits_web_text_none compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@uservisits_web_text_none -PREHOOK: Output: default@uservisits_web_text_none -#### A masked pattern was here #### -POSTHOOK: query: explain -analyze table default.UserVisits_web_text_none compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@uservisits_web_text_none -POSTHOOK: Output: default@uservisits_web_text_none -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: uservisits_web_text_none - Statistics: Num rows: 55 Data size: 65391 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: sourceip (type: string), desturl (type: string), visitdate (type: string), adrevenue (type: float), useragent (type: string), ccode (type: string), lcode (type: string), skeyword (type: string), avgtimeonsite (type: int) - outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite - Statistics: Num rows: 55 Data size: 65391 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(desturl, 'hll'), compute_stats(visitdate, 'hll'), compute_stats(adrevenue, 'hll'), compute_stats(useragent, 'hll'), compute_stats(ccode, 'hll'), compute_stats(lcode, 'hll'), compute_stats(skeyword, 'hll'), compute_stats(avgtimeonsite, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite - Column Types: string, string, string, float, string, string, string, string, int - Table: default.uservisits_web_text_none - -PREHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@uservisits_web_text_none -PREHOOK: Output: default@uservisits_web_text_none -#### A masked pattern was here #### -POSTHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@uservisits_web_text_none -POSTHOOK: Output: default@uservisits_web_text_none -#### A masked pattern was here #### -PREHOOK: query: describe formatted UserVisits_web_text_none destURL -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@uservisits_web_text_none -POSTHOOK: query: describe formatted UserVisits_web_text_none destURL -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@uservisits_web_text_none -col_name destURL -data_type string -min -max -num_nulls 0 -distinct_count 55 -avg_col_len 48.945454545454545 -max_col_len 96 -num_trues -num_falses -bit_vector HL -comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} -PREHOOK: query: describe formatted UserVisits_web_text_none adRevenue -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@uservisits_web_text_none -POSTHOOK: query: describe formatted UserVisits_web_text_none adRevenue -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@uservisits_web_text_none -col_name adRevenue -data_type float -min 13.099044799804688 -max 492.98870849609375 -num_nulls 0 -distinct_count 55 -avg_col_len -max_col_len -num_trues -num_falses -bit_vector HL -comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} -PREHOOK: query: describe formatted UserVisits_web_text_none avgTimeOnSite -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@uservisits_web_text_none -POSTHOOK: query: describe formatted UserVisits_web_text_none avgTimeOnSite -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@uservisits_web_text_none -col_name avgTimeOnSite -data_type int -min 1 -max 9 -num_nulls 0 -distinct_count 9 -avg_col_len -max_col_len -num_trues -num_falses -bit_vector HL -comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} -PREHOOK: query: CREATE TABLE empty_tab( - a int, - b double, - c string, - d boolean, - e binary) -row format delimited fields terminated by '|' stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@empty_tab -POSTHOOK: query: CREATE TABLE empty_tab( - a int, - b double, - c string, - d boolean, - e binary) -row format delimited fields terminated by '|' stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@empty_tab -PREHOOK: query: explain -analyze table empty_tab compute statistics for columns a,b,c,d,e -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@empty_tab -PREHOOK: Output: default@empty_tab -#### A masked pattern was here #### -POSTHOOK: query: explain -analyze table empty_tab compute statistics for columns a,b,c,d,e -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@empty_tab -POSTHOOK: Output: default@empty_tab -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: empty_tab - Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: double), c (type: string), d (type: boolean), e (type: binary) - outputColumnNames: a, b, c, d, e - Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll'), compute_stats(d, 'hll'), compute_stats(e, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2224 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: a, b, c, d, e - Column Types: int, double, string, boolean, binary - Table: default.empty_tab - -PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@empty_tab -PREHOOK: Output: default@empty_tab -#### A masked pattern was here #### -POSTHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@empty_tab -POSTHOOK: Output: default@empty_tab -#### A masked pattern was here #### -PREHOOK: query: create database if not exists dummydb -PREHOOK: type: CREATEDATABASE -PREHOOK: Output: database:dummydb -POSTHOOK: query: create database if not exists dummydb -POSTHOOK: type: CREATEDATABASE -POSTHOOK: Output: database:dummydb -PREHOOK: query: use dummydb -PREHOOK: type: SWITCHDATABASE -PREHOOK: Input: database:dummydb -POSTHOOK: query: use dummydb -POSTHOOK: type: SWITCHDATABASE -POSTHOOK: Input: database:dummydb -PREHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns destURL -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@uservisits_web_text_none -PREHOOK: Output: default@uservisits_web_text_none -#### A masked pattern was here #### -POSTHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns destURL -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@uservisits_web_text_none -POSTHOOK: Output: default@uservisits_web_text_none -#### A masked pattern was here #### -PREHOOK: query: describe formatted default.UserVisits_web_text_none destURL -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@uservisits_web_text_none -POSTHOOK: query: describe formatted default.UserVisits_web_text_none destURL -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@uservisits_web_text_none -col_name destURL -data_type string -min -max -num_nulls 0 -distinct_count 55 -avg_col_len 48.945454545454545 -max_col_len 96 -num_trues -num_falses -bit_vector HL -comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} -PREHOOK: query: CREATE TABLE UserVisits_in_dummy_db ( - sourceIP string, - destURL string, - visitDate string, - adRevenue float, - userAgent string, - cCode string, - lCode string, - sKeyword string, - avgTimeOnSite int) -row format delimited fields terminated by '|' stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:dummydb -PREHOOK: Output: dummydb@UserVisits_in_dummy_db -POSTHOOK: query: CREATE TABLE UserVisits_in_dummy_db ( - sourceIP string, - destURL string, - visitDate string, - adRevenue float, - userAgent string, - cCode string, - lCode string, - sKeyword string, - avgTimeOnSite int) -row format delimited fields terminated by '|' stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:dummydb -POSTHOOK: Output: dummydb@UserVisits_in_dummy_db -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_in_dummy_db -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: dummydb@uservisits_in_dummy_db -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_in_dummy_db -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: dummydb@uservisits_in_dummy_db -PREHOOK: query: use default -PREHOOK: type: SWITCHDATABASE -PREHOOK: Input: database:default -POSTHOOK: query: use default -POSTHOOK: type: SWITCHDATABASE -POSTHOOK: Input: database:default -PREHOOK: query: explain -analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: dummydb@uservisits_in_dummy_db -PREHOOK: Output: dummydb@uservisits_in_dummy_db -#### A masked pattern was here #### -POSTHOOK: query: explain -analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: dummydb@uservisits_in_dummy_db -POSTHOOK: Output: dummydb@uservisits_in_dummy_db -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: uservisits_in_dummy_db - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) - outputColumnNames: sourceip, adrevenue, avgtimeonsite - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: sourceIP, avgTimeOnSite, adRevenue - Column Types: string, int, float - Table: dummydb.uservisits_in_dummy_db - -PREHOOK: query: explain extended -analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: dummydb@uservisits_in_dummy_db -PREHOOK: Output: dummydb@uservisits_in_dummy_db -#### A masked pattern was here #### -POSTHOOK: query: explain extended -analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: dummydb@uservisits_in_dummy_db -POSTHOOK: Output: dummydb@uservisits_in_dummy_db -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: uservisits_in_dummy_db - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Statistics Aggregation Key Prefix: dummydb.uservisits_in_dummy_db/ - GatherStats: true - Select Operator - expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) - outputColumnNames: sourceip, adrevenue, avgtimeonsite - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(avgtimeonsite, 'hll'), compute_stats(adrevenue, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: uservisits_in_dummy_db - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite - columns.comments - columns.types string:string:string:float:string:string:string:string:int - field.delim | -#### A masked pattern was here #### - name dummydb.uservisits_in_dummy_db - numFiles 1 - numRows 0 - rawDataSize 0 - serialization.ddl struct uservisits_in_dummy_db { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 7060 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite - columns.comments - columns.types string:string:string:float:string:string:string:string:int - field.delim | -#### A masked pattern was here #### - name dummydb.uservisits_in_dummy_db - numFiles 1 - numRows 0 - rawDataSize 0 - serialization.ddl struct uservisits_in_dummy_db { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 7060 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dummydb.uservisits_in_dummy_db - name: dummydb.uservisits_in_dummy_db - Truncated Path -> Alias: - /dummydb.db/uservisits_in_dummy_db [uservisits_in_dummy_db] - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1512 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Stats Aggregation Key Prefix: dummydb.uservisits_in_dummy_db/ - Column Stats Desc: - Columns: sourceIP, avgTimeOnSite, adRevenue - Column Types: string, int, float - Table: dummydb.uservisits_in_dummy_db - Is Table Level Stats: true - -PREHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: dummydb@uservisits_in_dummy_db -PREHOOK: Output: dummydb@uservisits_in_dummy_db -#### A masked pattern was here #### -POSTHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: dummydb@uservisits_in_dummy_db -POSTHOOK: Output: dummydb@uservisits_in_dummy_db -#### A masked pattern was here #### -PREHOOK: query: explain -analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: dummydb@uservisits_in_dummy_db -PREHOOK: Output: dummydb@uservisits_in_dummy_db -#### A masked pattern was here #### -POSTHOOK: query: explain -analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: dummydb@uservisits_in_dummy_db -POSTHOOK: Output: dummydb@uservisits_in_dummy_db -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: uservisits_in_dummy_db - Statistics: Num rows: 55 Data size: 65391 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: sourceip (type: string), desturl (type: string), visitdate (type: string), adrevenue (type: float), useragent (type: string), ccode (type: string), lcode (type: string), skeyword (type: string), avgtimeonsite (type: int) - outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite - Statistics: Num rows: 55 Data size: 65391 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: compute_stats(sourceip, 'hll'), compute_stats(desturl, 'hll'), compute_stats(visitdate, 'hll'), compute_stats(adrevenue, 'hll'), compute_stats(useragent, 'hll'), compute_stats(ccode, 'hll'), compute_stats(lcode, 'hll'), compute_stats(skeyword, 'hll'), compute_stats(avgtimeonsite, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 3928 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite - Column Types: string, string, string, float, string, string, string, string, int - Table: dummydb.uservisits_in_dummy_db - -PREHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: dummydb@uservisits_in_dummy_db -PREHOOK: Output: dummydb@uservisits_in_dummy_db -#### A masked pattern was here #### -POSTHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: dummydb@uservisits_in_dummy_db -POSTHOOK: Output: dummydb@uservisits_in_dummy_db -#### A masked pattern was here #### -PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db destURL -PREHOOK: type: DESCTABLE -PREHOOK: Input: dummydb@uservisits_in_dummy_db -POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db destURL -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: dummydb@uservisits_in_dummy_db -col_name destURL -data_type string -min -max -num_nulls 0 -distinct_count 55 -avg_col_len 48.945454545454545 -max_col_len 96 -num_trues -num_falses -bit_vector HL -comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} -PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db adRevenue -PREHOOK: type: DESCTABLE -PREHOOK: Input: dummydb@uservisits_in_dummy_db -POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db adRevenue -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: dummydb@uservisits_in_dummy_db -col_name adRevenue -data_type float -min 13.099044799804688 -max 492.98870849609375 -num_nulls 0 -distinct_count 55 -avg_col_len -max_col_len -num_trues -num_falses -bit_vector HL -comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} -PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db avgTimeOnSite -PREHOOK: type: DESCTABLE -PREHOOK: Input: dummydb@uservisits_in_dummy_db -POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db avgTimeOnSite -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: dummydb@uservisits_in_dummy_db -col_name avgTimeOnSite -data_type int -min 1 -max 9 -num_nulls 0 -distinct_count 9 -avg_col_len -max_col_len -num_trues -num_falses -bit_vector HL -comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} -PREHOOK: query: drop table dummydb.UserVisits_in_dummy_db -PREHOOK: type: DROPTABLE -PREHOOK: Input: dummydb@uservisits_in_dummy_db -PREHOOK: Output: dummydb@uservisits_in_dummy_db -POSTHOOK: query: drop table dummydb.UserVisits_in_dummy_db -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: dummydb@uservisits_in_dummy_db -POSTHOOK: Output: dummydb@uservisits_in_dummy_db -PREHOOK: query: drop database dummydb -PREHOOK: type: DROPDATABASE -PREHOOK: Input: database:dummydb -PREHOOK: Output: database:dummydb -POSTHOOK: query: drop database dummydb -POSTHOOK: type: DROPDATABASE -POSTHOOK: Input: database:dummydb -POSTHOOK: Output: database:dummydb diff --git a/ql/src/test/results/clientpositive/llap/comments.q.out b/ql/src/test/results/clientpositive/llap/comments.q.out deleted file mode 100644 index 8282dc728a..0000000000 --- a/ql/src/test/results/clientpositive/llap/comments.q.out +++ /dev/null @@ -1,311 +0,0 @@ -PREHOOK: query: select key from src limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select key from src limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -238 -PREHOOK: query: /* comment comment */ -select key from src limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: /* comment comment */ -select key from src limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -238 -PREHOOK: query: select /*comment*/ key from src limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select /*comment*/ key from src limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -238 -PREHOOK: query: select /*comment*/ key from /* comment */ src /* comment */ limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select /*comment*/ key from /* comment */ src /* comment */ limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -238 -PREHOOK: query: select /**/ key /* */ from src limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select /**/ key /* */ from src limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -238 -PREHOOK: query: /* - -*/ -select /* -*/ key from src limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: /* - -*/ -select /* -*/ key from src limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -238 -PREHOOK: query: select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -1019 -PREHOOK: query: explain extended select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain extended select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` -FROM (SELECT `key` -FROM `default`.`src` -WHERE `key` > 0) AS `t0` -INNER JOIN (SELECT `key` -FROM `default`.`src` -WHERE `key` > 0) AS `t2` ON `t0`.`key` = `t2`.`key` -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (UDFToDouble(key) > 0.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (UDFToDouble(key) > 0.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 166 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - filterExpr: (UDFToDouble(key) > 0.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (UDFToDouble(key) > 0.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [b] - Reducer 2 - Execution mode: vectorized, llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/complex_alias.q.out b/ql/src/test/results/clientpositive/llap/complex_alias.q.out deleted file mode 100644 index 51664b2316..0000000000 --- a/ql/src/test/results/clientpositive/llap/complex_alias.q.out +++ /dev/null @@ -1,272 +0,0 @@ -PREHOOK: query: CREATE TABLE agg1 (col0 INT, col1 STRING, col2 DOUBLE) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@agg1 -POSTHOOK: query: CREATE TABLE agg1 (col0 INT, col1 STRING, col2 DOUBLE) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@agg1 -PREHOOK: query: INSERT INTO TABLE agg1 select key,value,key from src tablesample (1 rows) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@agg1 -POSTHOOK: query: INSERT INTO TABLE agg1 select key,value,key from src tablesample (1 rows) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@agg1 -POSTHOOK: Lineage: agg1.col0 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: agg1.col1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: agg1.col2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product -PREHOOK: query: EXPLAIN -SELECT single_use_subq11.a1 AS a1, - single_use_subq11.a2 AS a2 -FROM (SELECT Sum(agg1.col2) AS a1 - FROM agg1 - GROUP BY agg1.col0) single_use_subq12 - JOIN (SELECT alias.a2 AS a0, - alias.a1 AS a1, - alias.a1 AS a2 - FROM (SELECT agg1.col1 AS a0, - '42' AS a1, - agg1.col0 AS a2 - FROM agg1 - UNION ALL - SELECT agg1.col1 AS a0, - '41' AS a1, - agg1.col0 AS a2 - FROM agg1) alias - GROUP BY alias.a2, - alias.a1) single_use_subq11 - ON ( single_use_subq11.a0 = single_use_subq11.a0 ) -PREHOOK: type: QUERY -PREHOOK: Input: default@agg1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT single_use_subq11.a1 AS a1, - single_use_subq11.a2 AS a2 -FROM (SELECT Sum(agg1.col2) AS a1 - FROM agg1 - GROUP BY agg1.col0) single_use_subq12 - JOIN (SELECT alias.a2 AS a0, - alias.a1 AS a1, - alias.a1 AS a2 - FROM (SELECT agg1.col1 AS a0, - '42' AS a1, - agg1.col0 AS a2 - FROM agg1 - UNION ALL - SELECT agg1.col1 AS a0, - '41' AS a1, - agg1.col0 AS a2 - FROM agg1) alias - GROUP BY alias.a2, - alias.a1) single_use_subq11 - ON ( single_use_subq11.a0 = single_use_subq11.a0 ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@agg1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 4 <- Union 5 (CONTAINS) - Map 7 <- Union 5 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) - Reducer 6 <- Union 5 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: agg1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: col0 (type: int) - outputColumnNames: col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: col0 (type: int) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: agg1 - filterExpr: col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: '42' (type: string), col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: int) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: agg1 - filterExpr: col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: '41' (type: string), col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: int) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Union 5 - Vertex: Union 5 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product -PREHOOK: query: SELECT single_use_subq11.a1 AS a1, - single_use_subq11.a2 AS a2 -FROM (SELECT Sum(agg1.col2) AS a1 - FROM agg1 - GROUP BY agg1.col0) single_use_subq12 - JOIN (SELECT alias.a2 AS a0, - alias.a1 AS a1, - alias.a1 AS a2 - FROM (SELECT agg1.col1 AS a0, - '42' AS a1, - agg1.col0 AS a2 - FROM agg1 - UNION ALL - SELECT agg1.col1 AS a0, - '41' AS a1, - agg1.col0 AS a2 - FROM agg1) alias - GROUP BY alias.a2, - alias.a1) single_use_subq11 - ON ( single_use_subq11.a0 = single_use_subq11.a0 ) -PREHOOK: type: QUERY -PREHOOK: Input: default@agg1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT single_use_subq11.a1 AS a1, - single_use_subq11.a2 AS a2 -FROM (SELECT Sum(agg1.col2) AS a1 - FROM agg1 - GROUP BY agg1.col0) single_use_subq12 - JOIN (SELECT alias.a2 AS a0, - alias.a1 AS a1, - alias.a1 AS a2 - FROM (SELECT agg1.col1 AS a0, - '42' AS a1, - agg1.col0 AS a2 - FROM agg1 - UNION ALL - SELECT agg1.col1 AS a0, - '41' AS a1, - agg1.col0 AS a2 - FROM agg1) alias - GROUP BY alias.a2, - alias.a1) single_use_subq11 - ON ( single_use_subq11.a0 = single_use_subq11.a0 ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@agg1 -#### A masked pattern was here #### -42 42 -41 41 diff --git a/ql/src/test/results/clientpositive/llap/constantPropagateForSubQuery.q.out b/ql/src/test/results/clientpositive/llap/constantPropagateForSubQuery.q.out deleted file mode 100644 index 5fac885d59..0000000000 --- a/ql/src/test/results/clientpositive/llap/constantPropagateForSubQuery.q.out +++ /dev/null @@ -1,287 +0,0 @@ -Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: explain extended - select * from (select a.key as ak, a.value as av, b.key as bk, b.value as bv from src a join src1 b where a.key = '429' ) c -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain extended - select * from (select a.key as ak, a.value as av, b.key as bk, b.value as bv from src a join src1 b where a.key = '429' ) c -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -OPTIMIZED SQL: SELECT CAST('429' AS STRING) AS `ak`, `t0`.`value` AS `av`, `t1`.`key` AS `bk`, `t1`.`value` AS `bv` -FROM (SELECT `value` -FROM `default`.`src` -WHERE `key` = '429') AS `t0`, -(SELECT `key`, `value` -FROM `default`.`src1`) AS `t1` -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key = '429') (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '429') (type: boolean) - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col0 (type: string) - auto parallelism: false - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - value expressions: _col0 (type: string), _col1 (type: string) - auto parallelism: false - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 - Truncated Path -> Alias: - /src1 [b] - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Position of Big Table: 0 - Statistics: Num rows: 50 Data size: 13300 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: '429' (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 50 Data size: 17650 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 50 Data size: 17650 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: select * from (select a.key as ak, a.value as av, b.key as bk, b.value as bv from src a join src1 b where a.key = '429' ) c -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select * from (select a.key as ak, a.value as av, b.key as bk, b.value as bv from src a join src1 b where a.key = '429' ) c -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -429 val_429 -429 val_429 -429 val_429 -429 val_429 -429 val_429 -429 val_429 -429 val_429 -429 val_429 -429 val_429 val_165 -429 val_429 val_165 -429 val_429 val_193 -429 val_429 val_193 -429 val_429 val_265 -429 val_429 val_265 -429 val_429 val_27 -429 val_429 val_27 -429 val_429 val_409 -429 val_429 val_409 -429 val_429 val_484 -429 val_429 val_484 -429 val_429 128 -429 val_429 128 -429 val_429 146 val_146 -429 val_429 146 val_146 -429 val_429 150 val_150 -429 val_429 150 val_150 -429 val_429 213 val_213 -429 val_429 213 val_213 -429 val_429 224 -429 val_429 224 -429 val_429 238 val_238 -429 val_429 238 val_238 -429 val_429 255 val_255 -429 val_429 255 val_255 -429 val_429 273 val_273 -429 val_429 273 val_273 -429 val_429 278 val_278 -429 val_429 278 val_278 -429 val_429 311 val_311 -429 val_429 311 val_311 -429 val_429 369 -429 val_429 369 -429 val_429 401 val_401 -429 val_429 401 val_401 -429 val_429 406 val_406 -429 val_429 406 val_406 -429 val_429 66 val_66 -429 val_429 66 val_66 -429 val_429 98 val_98 -429 val_429 98 val_98 diff --git a/ql/src/test/results/clientpositive/llap/constant_prop_1.q.out b/ql/src/test/results/clientpositive/llap/constant_prop_1.q.out deleted file mode 100644 index df58572b17..0000000000 --- a/ql/src/test/results/clientpositive/llap/constant_prop_1.q.out +++ /dev/null @@ -1,647 +0,0 @@ -PREHOOK: query: explain -select 1 as a from src -union all -select 1 as a from src limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select 1 as a from src -union all -select 1 as a from src limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs - Union 2 - Vertex: Union 2 - - Stage: Stage-0 - Fetch Operator - limit: 1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[19][tables = [sub, b]] in Stage 'Reducer 4' is a cross product -PREHOOK: query: explain -select a, key, value from -( -select 1 as a from src -union all -select 1 as a from src limit 1 -)sub join src b where value='12345' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select a, key, value from -( -select 1 as a from src -union all -select 1 as a from src limit 1 -)sub join src b where value='12345' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 5 <- Union 2 (CONTAINS) - Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 6 (XPROD_EDGE), Reducer 3 (XPROD_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: b - filterExpr: (value = '12345') (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (value = '12345') (type: boolean) - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1 - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 1 (type: int), _col1 (type: string), '12345' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select 1 as a from src -union all -select 2 as a from src limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select 1 as a from src -union all -select 2 as a from src limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 2 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: no inputs - Union 2 - Vertex: Union 2 - - Stage: Stage-0 - Fetch Operator - limit: 1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[19][tables = [sub, b]] in Stage 'Reducer 4' is a cross product -PREHOOK: query: explain -select a, key, value from -( -select 1 as a from src -union all -select 2 as a from src limit 1 -)sub join src b where value='12345' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select a, key, value from -( -select 1 as a from src -union all -select 2 as a from src limit 1 -)sub join src b where value='12345' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Union 2 (CONTAINS) - Map 5 <- Union 2 (CONTAINS) - Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 6 (XPROD_EDGE), Reducer 3 (XPROD_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 2 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: b - filterExpr: (value = '12345') (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (value = '12345') (type: boolean) - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), '12345' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[10][tables = [a, b]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: explain -select a.key, b.value from src a join src b where a.key = '238' and b.value = '234' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select a.key, b.value from src a join src b where a.key = '238' and b.value = '234' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key = '238') (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = '238') (type: boolean) - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - filterExpr: (value = '234') (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (value = '234') (type: boolean) - Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: '238' (type: string), '234' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select a.key, b.value from src a join src b on a.key=b.key where b.value = '234' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select a.key, b.value from src a join src b on a.key=b.key where b.value = '234' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - filterExpr: (key is not null and (value = '234')) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key is not null and (value = '234')) (type: boolean) - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), '234' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 522 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 522 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: create table t_n26 ( -a int, -b int, -c int, -d int, -e int -) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t_n26 -POSTHOOK: query: create table t_n26 ( -a int, -b int, -c int, -d int, -e int -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t_n26 -PREHOOK: query: explain -select a2 as a3 from -(select a1 as a2, c1 as c2 from -(select a as a1, b as b1, c as c1 from t_n26 where a=1 and b=2 and c=3)sub1)sub2 -PREHOOK: type: QUERY -PREHOOK: Input: default@t_n26 -#### A masked pattern was here #### -POSTHOOK: query: explain -select a2 as a3 from -(select a1 as a2, c1 as c2 from -(select a as a1, b as b1, c as c1 from t_n26 where a=1 and b=2 and c=3)sub1)sub2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t_n26 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: t_n26 - filterExpr: ((a = 1) and (b = 2) and (c = 3)) (type: boolean) - Filter Operator - predicate: ((a = 1) and (b = 2) and (c = 3)) (type: boolean) - Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/constant_prop_2.q.out b/ql/src/test/results/clientpositive/llap/constant_prop_2.q.out deleted file mode 100644 index aaf7d89d86..0000000000 --- a/ql/src/test/results/clientpositive/llap/constant_prop_2.q.out +++ /dev/null @@ -1,110 +0,0 @@ -PREHOOK: query: explain select count('1') from src group by '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select count('1') from src group by '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 1 - Processor Tree: - ListSink - -PREHOOK: query: select count('1') from src group by '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select count('1') from src group by '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -500 -PREHOOK: query: explain -analyze table srcpart partition (ds='2008-04-08',hr=11) compute statistics for columns key, value -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Output: default@srcpart -PREHOOK: Output: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: explain -analyze table srcpart partition (ds='2008-04-08',hr=11) compute statistics for columns key, value -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@srcpart -POSTHOOK: Output: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: ((ds = '2008-04-08') and (hr = '11')) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: '2008-04-08' (type: string), '11' (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: '2008-04-08' (type: string), '11' (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: '2008-04-08' (type: string), '11' (type: string) - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col2 (type: struct), _col3 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: '2008-04-08' (type: string), '11' (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.srcpart - diff --git a/ql/src/test/results/clientpositive/llap/constant_prop_3.q.out b/ql/src/test/results/clientpositive/llap/constant_prop_3.q.out deleted file mode 100644 index a8a23dd3ed..0000000000 --- a/ql/src/test/results/clientpositive/llap/constant_prop_3.q.out +++ /dev/null @@ -1,418 +0,0 @@ -PREHOOK: query: drop table part_hive -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table part_hive -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table partsupp_hive -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table partsupp_hive -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table supplier_hive -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table supplier_hive -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table part_hive (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, -P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@part_hive -POSTHOOK: query: create table part_hive (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, -P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@part_hive -PREHOOK: query: create table partsupp_hive (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, -PS_COMMENT STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@partsupp_hive -POSTHOOK: query: create table partsupp_hive (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, -PS_COMMENT STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@partsupp_hive -PREHOOK: query: create table supplier_hive (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, -S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@supplier_hive -POSTHOOK: query: create table supplier_hive (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, -S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@supplier_hive -PREHOOK: query: analyze table part_hive compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@part_hive -PREHOOK: Output: default@part_hive -POSTHOOK: query: analyze table part_hive compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part_hive -POSTHOOK: Output: default@part_hive -PREHOOK: query: analyze table part_hive compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@part_hive -PREHOOK: Output: default@part_hive -#### A masked pattern was here #### -POSTHOOK: query: analyze table part_hive compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@part_hive -POSTHOOK: Output: default@part_hive -#### A masked pattern was here #### -PREHOOK: query: analyze table partsupp_hive compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@partsupp_hive -PREHOOK: Output: default@partsupp_hive -POSTHOOK: query: analyze table partsupp_hive compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@partsupp_hive -POSTHOOK: Output: default@partsupp_hive -PREHOOK: query: analyze table partsupp_hive compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@partsupp_hive -PREHOOK: Output: default@partsupp_hive -#### A masked pattern was here #### -POSTHOOK: query: analyze table partsupp_hive compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@partsupp_hive -POSTHOOK: Output: default@partsupp_hive -#### A masked pattern was here #### -PREHOOK: query: analyze table supplier_hive compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@supplier_hive -PREHOOK: Output: default@supplier_hive -POSTHOOK: query: analyze table supplier_hive compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@supplier_hive -POSTHOOK: Output: default@supplier_hive -PREHOOK: query: analyze table supplier_hive compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@supplier_hive -PREHOOK: Output: default@supplier_hive -#### A masked pattern was here #### -POSTHOOK: query: analyze table supplier_hive compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@supplier_hive -POSTHOOK: Output: default@supplier_hive -#### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[69][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: explain select - p_brand, - p_type, - p_size, - count(distinct ps_suppkey) as supplier_cnt -from - partsupp_hive, - part_hive -where - p_partkey = ps_partkey - and p_brand <> 'Brand#34' - and p_type not like 'ECONOMY BRUSHED%' - and p_size in (22, 14, 27, 49, 21, 33, 35, 28) - and partsupp_hive.ps_suppkey not in ( - select - s_suppkey - from - supplier_hive - where - s_comment like '%Customer%Complaints%' - ) -group by - p_brand, - p_type, - p_size -order by - supplier_cnt desc, - p_brand, - p_type, - p_size -PREHOOK: type: QUERY -PREHOOK: Input: default@part_hive -PREHOOK: Input: default@partsupp_hive -PREHOOK: Input: default@supplier_hive -#### A masked pattern was here #### -POSTHOOK: query: explain select - p_brand, - p_type, - p_size, - count(distinct ps_suppkey) as supplier_cnt -from - partsupp_hive, - part_hive -where - p_partkey = ps_partkey - and p_brand <> 'Brand#34' - and p_type not like 'ECONOMY BRUSHED%' - and p_size in (22, 14, 27, 49, 21, 33, 35, 28) - and partsupp_hive.ps_suppkey not in ( - select - s_suppkey - from - supplier_hive - where - s_comment like '%Customer%Complaints%' - ) -group by - p_brand, - p_type, - p_size -order by - supplier_cnt desc, - p_brand, - p_type, - p_size -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part_hive -POSTHOOK: Input: default@partsupp_hive -POSTHOOK: Input: default@supplier_hive -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 6 (SIMPLE_EDGE) - Reducer 9 <- Map 10 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part_hive - filterExpr: ((p_size) IN (22, 14, 27, 49, 21, 33, 35, 28) and (p_brand <> 'Brand#34') and p_partkey is not null and (not (p_type like 'ECONOMY BRUSHED%'))) (type: boolean) - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((p_size) IN (22, 14, 27, 49, 21, 33, 35, 28) and (p_brand <> 'Brand#34') and p_partkey is not null and (not (p_type like 'ECONOMY BRUSHED%'))) (type: boolean) - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 10 - Map Operator Tree: - TableScan - alias: partsupp_hive - filterExpr: ps_partkey is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ps_partkey is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ps_partkey (type: int), ps_suppkey (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: supplier_hive - filterExpr: ((s_comment like '%Customer%Complaints%') or ((s_comment like '%Customer%Complaints%') and s_suppkey is not null)) (type: boolean) - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (s_comment like '%Customer%Complaints%') (type: boolean) - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_suppkey (type: int) - outputColumnNames: s_suppkey - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(), count(s_suppkey) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Filter Operator - predicate: ((s_comment like '%Customer%Complaints%') and s_suppkey is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_suppkey (type: int) - outputColumnNames: s_suppkey - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: s_suppkey (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7, _col9 - Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col7 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col9 (type: boolean) - outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9 - Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((_col6 = 0L) or (_col9 is null and (_col7 >= _col6) and _col1 is not null)) (type: boolean) - Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col1 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int) - null sort order: zzzz - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col0) - keys: _col1 (type: string), _col2 (type: string), _col3 (type: int) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col3 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: int) - null sort order: zzzz - sort order: -+++ - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 8 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col3 (type: boolean) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/constant_prop_timestamp_date_cast.q.out b/ql/src/test/results/clientpositive/llap/constant_prop_timestamp_date_cast.q.out deleted file mode 100644 index 765d903f85..0000000000 --- a/ql/src/test/results/clientpositive/llap/constant_prop_timestamp_date_cast.q.out +++ /dev/null @@ -1,87 +0,0 @@ -PREHOOK: query: CREATE TABLE constant_prop(start_dt timestamp, stop_dt timestamp) STORED AS ORC TBLPROPERTIES ('transactional'='true') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@constant_prop -POSTHOOK: query: CREATE TABLE constant_prop(start_dt timestamp, stop_dt timestamp) STORED AS ORC TBLPROPERTIES ('transactional'='true') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@constant_prop -PREHOOK: query: explain UPDATE constant_prop SET stop_dt = CURRENT_TIMESTAMP WHERE CAST(start_dt AS DATE) = CURRENT_DATE -PREHOOK: type: QUERY -PREHOOK: Input: default@constant_prop -PREHOOK: Output: default@constant_prop -POSTHOOK: query: explain UPDATE constant_prop SET stop_dt = CURRENT_TIMESTAMP WHERE CAST(start_dt AS DATE) = CURRENT_DATE -POSTHOOK: type: QUERY -POSTHOOK: Input: default@constant_prop -POSTHOOK: Output: default@constant_prop -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: constant_prop - filterExpr: (CAST( start_dt AS DATE) = DATE'2020-03-05') (type: boolean) - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (CAST( start_dt AS DATE) = DATE'2020-03-05') (type: boolean) - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ROW__ID (type: struct), start_dt (type: timestamp) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - null sort order: z - sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: timestamp) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: timestamp), TIMESTAMP'2020-03-05 14:16:57' (type: timestamp) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.constant_prop - Write Type: UPDATE - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: false - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.constant_prop - Write Type: UPDATE - - Stage: Stage-3 - Stats Work - Basic Stats Work: - diff --git a/ql/src/test/results/clientpositive/llap/constprog2.q.out b/ql/src/test/results/clientpositive/llap/constprog2.q.out deleted file mode 100644 index 2470d5a6d5..0000000000 --- a/ql/src/test/results/clientpositive/llap/constprog2.q.out +++ /dev/null @@ -1,182 +0,0 @@ -Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN -SELECT src1.key, src1.key + 1, src2.value - FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND src1.key = 86 -PREHOOK: type: QUERY -PREHOOK: Input: default@srcbucket -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT src1.key, src1.key + 1, src2.value - FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND src1.key = 86 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcbucket -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (key = 86) (type: boolean) - Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = 86) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: (key = 86) (type: boolean) - Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = 86) (type: boolean) - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 86 (type: int), 87 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN -SELECT src1.key, src1.key + 1, src2.value - FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND cast(src1.key as double) = 86 -PREHOOK: type: QUERY -PREHOOK: Input: default@srcbucket -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT src1.key, src1.key + 1, src2.value - FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND cast(src1.key as double) = 86 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcbucket -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key) = 86.0D) (type: boolean) - Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) = 86.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: (key = 86) (type: boolean) - Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = 86) (type: boolean) - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1 - Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 86 (type: int), 87 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/constprog3.q.out b/ql/src/test/results/clientpositive/llap/constprog3.q.out deleted file mode 100644 index f46ba83d80..0000000000 --- a/ql/src/test/results/clientpositive/llap/constprog3.q.out +++ /dev/null @@ -1,106 +0,0 @@ -PREHOOK: query: create temporary table table1(id int, val int, val1 int, dimid int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@table1 -POSTHOOK: query: create temporary table table1(id int, val int, val1 int, dimid int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@table1 -PREHOOK: query: create temporary table table3(id int, val int, val1 int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@table3 -POSTHOOK: query: create temporary table table3(id int, val int, val1 int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@table3 -Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: explain -select table1.id, table1.val, table1.val1 -from table1 inner join table3 -on table1.dimid = table3.id and table3.id = 1 where table1.dimid <> 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@table1 -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: explain -select table1.id, table1.val, table1.val1 -from table1 inner join table3 -on table1.dimid = table3.id and table3.id = 1 where table1.dimid <> 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table1 -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: table1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: id (type: int), val (type: int), val1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 0 - Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: table3 - filterExpr: (id = 1) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (id = 1) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/constprog_dp.q.out b/ql/src/test/results/clientpositive/llap/constprog_dp.q.out deleted file mode 100644 index 1eb1001fcb..0000000000 --- a/ql/src/test/results/clientpositive/llap/constprog_dp.q.out +++ /dev/null @@ -1,135 +0,0 @@ -PREHOOK: query: create table dest_n1(key string, value string) partitioned by (ds string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest_n1 -POSTHOOK: query: create table dest_n1(key string, value string) partitioned by (ds string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest_n1 -PREHOOK: query: EXPLAIN -from srcpart -insert overwrite table dest_n1 partition (ds) select key, value, ds where ds='2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@dest_n1 -POSTHOOK: query: EXPLAIN -from srcpart -insert overwrite table dest_n1 partition (ds) select key, value, ds where ds='2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 272000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 272000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_n1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, ds - Statistics: Num rows: 1000 Data size: 272000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 974 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - ds - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_n1 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.dest_n1 - -PREHOOK: query: from srcpart -insert overwrite table dest_n1 partition (ds) select key, value, ds where ds='2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@dest_n1 -POSTHOOK: query: from srcpart -insert overwrite table dest_n1 partition (ds) select key, value, ds where ds='2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@dest_n1@ds=2008-04-08 -POSTHOOK: Lineage: dest_n1 PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_n1 PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] diff --git a/ql/src/test/results/clientpositive/llap/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/llap/constprog_partitioner.q.out deleted file mode 100644 index 9305e705b6..0000000000 --- a/ql/src/test/results/clientpositive/llap/constprog_partitioner.q.out +++ /dev/null @@ -1,197 +0,0 @@ -Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN -SELECT src1.key, src1.key + 1, src2.value - FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND src1.key = 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@srcbucket -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT src1.key, src1.key + 1, src2.value - FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND src1.key = 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcbucket -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (key = 100) (type: boolean) - Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = 100) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: (key = 100) (type: boolean) - Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = 100) (type: boolean) - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 100 (type: int), 101 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: EXPLAIN -SELECT l_partkey, l_suppkey -FROM lineitem li -WHERE li.l_linenumber = 1 AND - li.l_orderkey IN (SELECT l_orderkey FROM lineitem WHERE l_shipmode = 'AIR' AND l_linenumber = li.l_linenumber) -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT l_partkey, l_suppkey -FROM lineitem li -WHERE li.l_linenumber = 1 AND - li.l_orderkey IN (SELECT l_orderkey FROM lineitem WHERE l_shipmode = 'AIR' AND l_linenumber = li.l_linenumber) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: li - filterExpr: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), 1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), 1 (type: int) - Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: lineitem - filterExpr: ((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 100 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: l_orderkey (type: int), 1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int), 1 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/correlated_join_keys.q.out b/ql/src/test/results/clientpositive/llap/correlated_join_keys.q.out deleted file mode 100644 index c924eb0246..0000000000 --- a/ql/src/test/results/clientpositive/llap/correlated_join_keys.q.out +++ /dev/null @@ -1,290 +0,0 @@ -PREHOOK: query: drop table customer_address_n0 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table customer_address_n0 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table customer_address_n0 -( - ca_address_sk int, - ca_address_id string, - ca_street_number string, - ca_street_name string, - ca_street_type string, - ca_suite_number string, - ca_city string, - ca_county string, - ca_state string, - ca_zip string, - ca_country string, - ca_gmt_offset float, - ca_location_type string -) -row format delimited fields terminated by '|' -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@customer_address_n0 -POSTHOOK: query: create table customer_address_n0 -( - ca_address_sk int, - ca_address_id string, - ca_street_number string, - ca_street_name string, - ca_street_type string, - ca_suite_number string, - ca_city string, - ca_county string, - ca_state string, - ca_zip string, - ca_country string, - ca_gmt_offset float, - ca_location_type string -) -row format delimited fields terminated by '|' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@customer_address_n0 -PREHOOK: query: load data local inpath '../../data/files/customer_address.txt' overwrite into table customer_address_n0 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@customer_address_n0 -POSTHOOK: query: load data local inpath '../../data/files/customer_address.txt' overwrite into table customer_address_n0 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@customer_address_n0 -PREHOOK: query: analyze table customer_address_n0 compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@customer_address_n0 -PREHOOK: Output: default@customer_address_n0 -POSTHOOK: query: analyze table customer_address_n0 compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@customer_address_n0 -POSTHOOK: Output: default@customer_address_n0 -PREHOOK: query: analyze table customer_address_n0 compute statistics for columns ca_state, ca_zip -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@customer_address_n0 -PREHOOK: Output: default@customer_address_n0 -#### A masked pattern was here #### -POSTHOOK: query: analyze table customer_address_n0 compute statistics for columns ca_state, ca_zip -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@customer_address_n0 -POSTHOOK: Output: default@customer_address_n0 -#### A masked pattern was here #### -PREHOOK: query: explain select count(*) from customer_address_n0 a join customer_address_n0 b on (a.ca_zip = b.ca_zip and a.ca_state = b.ca_state) -PREHOOK: type: QUERY -PREHOOK: Input: default@customer_address_n0 -#### A masked pattern was here #### -POSTHOOK: query: explain select count(*) from customer_address_n0 a join customer_address_n0 b on (a.ca_zip = b.ca_zip and a.ca_state = b.ca_state) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@customer_address_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (ca_zip is not null and ca_state is not null) (type: boolean) - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (ca_zip is not null and ca_state is not null) (type: boolean) - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ca_state (type: string), ca_zip (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: (ca_zip is not null and ca_state is not null) (type: boolean) - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (ca_zip is not null and ca_state is not null) (type: boolean) - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ca_state (type: string), ca_zip (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.8 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select count(*) from customer_address_n0 a join customer_address_n0 b on (a.ca_zip = b.ca_zip and a.ca_state = b.ca_state) -PREHOOK: type: QUERY -PREHOOK: Input: default@customer_address_n0 -#### A masked pattern was here #### -POSTHOOK: query: explain select count(*) from customer_address_n0 a join customer_address_n0 b on (a.ca_zip = b.ca_zip and a.ca_state = b.ca_state) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@customer_address_n0 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (ca_zip is not null and ca_state is not null) (type: boolean) - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (ca_zip is not null and ca_state is not null) (type: boolean) - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ca_state (type: string), ca_zip (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: (ca_zip is not null and ca_state is not null) (type: boolean) - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (ca_zip is not null and ca_state is not null) (type: boolean) - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ca_state (type: string), ca_zip (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.95 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: drop table customer_address_n0 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@customer_address_n0 -PREHOOK: Output: default@customer_address_n0 -POSTHOOK: query: drop table customer_address_n0 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@customer_address_n0 -POSTHOOK: Output: default@customer_address_n0 diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer10.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer10.q.out deleted file mode 100644 index c1577e73ae..0000000000 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer10.q.out +++ /dev/null @@ -1,1166 +0,0 @@ -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -LEFT SEMI JOIN src yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -LEFT SEMI JOIN src yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 6 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: yy - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.5897436 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.cnt -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -LEFT SEMI JOIN src yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.cnt -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -LEFT SEMI JOIN src yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 1 -146 1 -150 1 -213 1 -224 1 -238 1 -255 1 -273 1 -278 1 -311 1 -369 1 -401 1 -406 1 -66 1 -98 1 -XifREjIWNTdZii76gCxhIQ== -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -LEFT SEMI JOIN src yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -LEFT SEMI JOIN src yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 6 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: yy - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.5897436 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.cnt -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -LEFT SEMI JOIN src yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.cnt -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -LEFT SEMI JOIN src yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 1 -146 1 -150 1 -213 1 -224 1 -238 1 -255 1 -273 1 -278 1 -311 1 -369 1 -401 1 -406 1 -66 1 -98 1 -XifREjIWNTdZii76gCxhIQ== -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.value -FROM -src1 xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND - y.key > 20) yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.value -FROM -src1 xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND - y.key > 20) yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: xx - filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) - Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: x - filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: y - filterExpr: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(key) < 200.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(key) < 200.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.5090909 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 27 Data size: 2349 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 27 Data size: 2349 Basic stats: COMPLETE Column stats: COMPLETE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.value -FROM -src1 xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND - y.key > 20) yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.value -FROM -src1 xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND - y.key > 20) yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 -146 val_146 -150 val_150 -66 val_66 -98 val_98 -yXzkFzMwxxoH+6e+nKoA8A== -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.value -FROM -src1 xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND - y.key > 20) yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.value -FROM -src1 xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND - y.key > 20) yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: xx - filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) - Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: x - filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 20.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: y - filterExpr: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(key) < 200.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(key) < 200.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.5090909 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 27 Data size: 2349 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 27 Data size: 2349 Basic stats: COMPLETE Column stats: COMPLETE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.value -FROM -src1 xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND - y.key > 20) yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.value -FROM -src1 xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND - y.key > 20) yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 -146 val_146 -150 val_150 -66 val_66 -98 val_98 -yXzkFzMwxxoH+6e+nKoA8A== -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.value -FROM -src xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND x.key > 180) yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.value -FROM -src xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND x.key > 180) yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: xx - filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: x - filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: y - filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.5090909 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 27 Data size: 2349 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 27 Data size: 2349 Basic stats: COMPLETE Column stats: COMPLETE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.value -FROM -src xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND x.key > 180) yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.value -FROM -src xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND x.key > 180) yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -JoqlU/XtLjyBlZ8xMKANxg== -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.value -FROM -src xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND x.key > 180) yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.value -FROM -src xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND x.key > 180) yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: xx - filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: x - filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: y - filterExpr: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(key) < 200.0D) and (UDFToDouble(key) > 180.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.5090909 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 27 Data size: 2349 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 27 Data size: 2349 Basic stats: COMPLETE Column stats: COMPLETE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.value -FROM -src xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND x.key > 180) yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.value -FROM -src xx -LEFT SEMI JOIN -(SELECT x.key as key - FROM src x JOIN src y ON (x.key = y.key) - WHERE x.key < 200 AND x.key > 180) yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -JoqlU/XtLjyBlZ8xMKANxg== diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer11.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer11.q.out deleted file mode 100644 index 1105061882..0000000000 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer11.q.out +++ /dev/null @@ -1,642 +0,0 @@ -PREHOOK: query: CREATE TABLE part_table_n1(key string, value string) PARTITIONED BY (partitionId int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@part_table_n1 -POSTHOOK: query: CREATE TABLE part_table_n1(key string, value string) PARTITIONED BY (partitionId int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@part_table_n1 -PREHOOK: query: INSERT OVERWRITE TABLE part_table_n1 PARTITION (partitionId=1) - SELECT key, value FROM src ORDER BY key, value LIMIT 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@part_table_n1@partitionid=1 -POSTHOOK: query: INSERT OVERWRITE TABLE part_table_n1 PARTITION (partitionId=1) - SELECT key, value FROM src ORDER BY key, value LIMIT 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@part_table_n1@partitionid=1 -POSTHOOK: Lineage: part_table_n1 PARTITION(partitionid=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: part_table_n1 PARTITION(partitionid=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: INSERT OVERWRITE TABLE part_table_n1 PARTITION (partitionId=2) - SELECT key, value FROM src1 ORDER BY key, value -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -PREHOOK: Output: default@part_table_n1@partitionid=2 -POSTHOOK: query: INSERT OVERWRITE TABLE part_table_n1 PARTITION (partitionId=2) - SELECT key, value FROM src1 ORDER BY key, value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -POSTHOOK: Output: default@part_table_n1@partitionid=2 -POSTHOOK: Lineage: part_table_n1 PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: part_table_n1 PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN -SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 1 AND - y.partitionId = 2 -GROUP BY x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@part_table_n1 -PREHOOK: Input: default@part_table_n1@partitionid=1 -PREHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 1 AND - y.partitionId = 2 -GROUP BY x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part_table_n1 -POSTHOOK: Input: default@part_table_n1@partitionid=1 -POSTHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.825 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 665 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 665 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 665 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 665 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 1 AND - y.partitionId = 2 -GROUP BY x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@part_table_n1 -PREHOOK: Input: default@part_table_n1@partitionid=1 -PREHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 1 AND - y.partitionId = 2 -GROUP BY x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part_table_n1 -POSTHOOK: Input: default@part_table_n1@partitionid=1 -POSTHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### -146 2 -128 3 -150 1 -PREHOOK: query: EXPLAIN -SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 1 AND - y.partitionId = 2 -GROUP BY x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@part_table_n1 -PREHOOK: Input: default@part_table_n1@partitionid=1 -PREHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 1 AND - y.partitionId = 2 -GROUP BY x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part_table_n1 -POSTHOOK: Input: default@part_table_n1@partitionid=1 -POSTHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 100 Data size: 8700 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 40 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.825 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 665 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 665 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 665 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 665 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 1 AND - y.partitionId = 2 -GROUP BY x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@part_table_n1 -PREHOOK: Input: default@part_table_n1@partitionid=1 -PREHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 1 AND - y.partitionId = 2 -GROUP BY x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part_table_n1 -POSTHOOK: Input: default@part_table_n1@partitionid=1 -POSTHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### -146 2 -128 3 -150 1 -PREHOOK: query: EXPLAIN -SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 2 AND - y.partitionId = 2 -GROUP BY x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@part_table_n1 -PREHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 2 AND - y.partitionId = 2 -GROUP BY x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part_table_n1 -POSTHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.5897436 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 2 AND - y.partitionId = 2 -GROUP BY x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@part_table_n1 -PREHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 2 AND - y.partitionId = 2 -GROUP BY x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part_table_n1 -POSTHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### - 100 -146 1 -213 1 -273 1 -311 1 -369 1 -406 1 -66 1 -98 1 -128 1 -150 1 -224 1 -238 1 -255 1 -278 1 -401 1 -PREHOOK: query: EXPLAIN -SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 2 AND - y.partitionId = 2 -GROUP BY x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@part_table_n1 -PREHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 2 AND - y.partitionId = 2 -GROUP BY x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part_table_n1 -POSTHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.5897436 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 2 AND - y.partitionId = 2 -GROUP BY x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@part_table_n1 -PREHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.key AS key, count(1) AS cnt -FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key) -WHERE x.partitionId = 2 AND - y.partitionId = 2 -GROUP BY x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part_table_n1 -POSTHOOK: Input: default@part_table_n1@partitionid=2 -#### A masked pattern was here #### - 100 -146 1 -213 1 -273 1 -311 1 -369 1 -406 1 -66 1 -98 1 -128 1 -150 1 -224 1 -238 1 -255 1 -278 1 -401 1 diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer12.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer12.q.out deleted file mode 100644 index 05feb9b568..0000000000 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer12.q.out +++ /dev/null @@ -1,150 +0,0 @@ -PREHOOK: query: EXPLAIN SELECT xx.key, xx.cnt, yy.key, yy.cnt -FROM -(SELECT x.key as key, count(x.value) OVER (PARTITION BY x.key) AS cnt FROM src x) xx -JOIN -(SELECT y.key as key, count(y.value) OVER (PARTITION BY y.key) AS cnt FROM src1 y) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN SELECT xx.key, xx.cnt, yy.key, yy.cnt -FROM -(SELECT x.key as key, count(x.value) OVER (PARTITION BY x.key) AS cnt FROM src x) xx -JOIN -(SELECT y.key as key, count(y.value) OVER (PARTITION BY y.key) AS cnt FROM src1 y) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string, _col1: string - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col0 - raw input shape: - window functions: - window function definition - alias: count_window_0 - arguments: _col1 - name: count - window function: GenericUDAFCountEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), count_window_0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2350 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 223000 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string, _col1: string - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col0 - raw input shape: - window functions: - window function definition - alias: count_window_0 - arguments: _col1 - name: count - window function: GenericUDAFCountEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - Statistics: Num rows: 500 Data size: 223000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), count_window_0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 39 Data size: 7371 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 39 Data size: 7371 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer13.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer13.q.out deleted file mode 100644 index b6bfdcaf5d..0000000000 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer13.q.out +++ /dev/null @@ -1,163 +0,0 @@ -PREHOOK: query: CREATE TABLE tmp(c1 INT, c2 INT, c3 STRING, c4 STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tmp -POSTHOOK: query: CREATE TABLE tmp(c1 INT, c2 INT, c3 STRING, c4 STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tmp -PREHOOK: query: INSERT OVERWRITE TABLE tmp -SELECT x.key, y.key, x.value, y.value FROM src x JOIN src y ON (x.key = y.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@tmp -POSTHOOK: query: INSERT OVERWRITE TABLE tmp -SELECT x.key, y.key, x.value, y.value FROM src x JOIN src y ON (x.key = y.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@tmp -POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN -SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c3, x.c1) xx -JOIN -(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c3, x1.c1) yy -ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) ORDER BY xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt -PREHOOK: type: QUERY -PREHOOK: Input: default@tmp -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c3, x.c1) xx -JOIN -(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c3, x1.c1) yy -ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) ORDER BY xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tmp -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: ((c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 1028 Data size: 97660 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 248 Data size: 23560 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: c1 (type: int), c3 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: x1 - filterExpr: ((c2 > 100) and (c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 1028 Data size: 101772 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((c2 > 100) and (c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 198 Data size: 19602 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: c1 (type: int), c3 (type: string) - outputColumnNames: c1, c3 - Statistics: Num rows: 198 Data size: 19602 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: c1 (type: int), c3 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 99 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 99 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: bigint) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 99 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey5 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 99 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 99 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer14.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer14.q.out deleted file mode 100644 index defd2bdb99..0000000000 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer14.q.out +++ /dev/null @@ -1,1500 +0,0 @@ -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: (key is not null and key BETWEEN DynamicValue(RS_11_y_key_min) AND DynamicValue(RS_11_y_key_max) and in_bloom_filter(key, DynamicValue(RS_11_y_key_bloom_filter))) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key is not null and key BETWEEN DynamicValue(RS_11_y_key_min) AND DynamicValue(RS_11_y_key_max) and in_bloom_filter(key, DynamicValue(RS_11_y_key_bloom_filter))) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x SORT BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y SORT BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x SORT BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y SORT BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 val_128 128 -128 val_128 128 -128 val_128 128 -146 val_146 146 val_146 -146 val_146 146 val_146 -150 val_150 150 val_150 -213 val_213 213 val_213 -213 val_213 213 val_213 -224 val_224 224 -224 val_224 224 -238 val_238 238 val_238 -238 val_238 238 val_238 -255 val_255 255 val_255 -255 val_255 255 val_255 -273 val_273 273 val_273 -273 val_273 273 val_273 -273 val_273 273 val_273 -278 val_278 278 val_278 -278 val_278 278 val_278 -311 val_311 311 val_311 -311 val_311 311 val_311 -311 val_311 311 val_311 -369 val_369 369 -369 val_369 369 -369 val_369 369 -401 val_401 401 val_401 -401 val_401 401 val_401 -401 val_401 401 val_401 -401 val_401 401 val_401 -401 val_401 401 val_401 -406 val_406 406 val_406 -406 val_406 406 val_406 -406 val_406 406 val_406 -406 val_406 406 val_406 -66 val_66 66 val_66 -98 val_98 98 val_98 -98 val_98 98 val_98 -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 val_128 128 -128 val_128 128 -128 val_128 128 -146 val_146 146 val_146 -146 val_146 146 val_146 -150 val_150 150 val_150 -213 val_213 213 val_213 -213 val_213 213 val_213 -224 val_224 224 -224 val_224 224 -238 val_238 238 val_238 -238 val_238 238 val_238 -255 val_255 255 val_255 -255 val_255 255 val_255 -273 val_273 273 val_273 -273 val_273 273 val_273 -273 val_273 273 val_273 -278 val_278 278 val_278 -278 val_278 278 val_278 -311 val_311 311 val_311 -311 val_311 311 val_311 -311 val_311 311 val_311 -369 val_369 369 -369 val_369 369 -369 val_369 369 -401 val_401 401 val_401 -401 val_401 401 val_401 -401 val_401 401 val_401 -401 val_401 401 val_401 -401 val_401 401 val_401 -406 val_406 406 val_406 -406 val_406 406 val_406 -406 val_406 406 val_406 -406 val_406 406 val_406 -66 val_66 66 val_66 -98 val_98 98 val_98 -98 val_98 98 val_98 -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key DESC) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key DESC) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x DISTRIBUTE BY key SORT BY key DESC) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key DESC) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: - - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: - - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x ORDER BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y ORDER BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x ORDER BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y ORDER BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 val_128 128 -128 val_128 128 -128 val_128 128 -146 val_146 146 val_146 -146 val_146 146 val_146 -150 val_150 150 val_150 -213 val_213 213 val_213 -213 val_213 213 val_213 -224 val_224 224 -224 val_224 224 -238 val_238 238 val_238 -238 val_238 238 val_238 -255 val_255 255 val_255 -255 val_255 255 val_255 -273 val_273 273 val_273 -273 val_273 273 val_273 -273 val_273 273 val_273 -278 val_278 278 val_278 -278 val_278 278 val_278 -311 val_311 311 val_311 -311 val_311 311 val_311 -311 val_311 311 val_311 -369 val_369 369 -369 val_369 369 -369 val_369 369 -401 val_401 401 val_401 -401 val_401 401 val_401 -401 val_401 401 val_401 -401 val_401 401 val_401 -401 val_401 401 val_401 -406 val_406 406 val_406 -406 val_406 406 val_406 -406 val_406 406 val_406 -406 val_406 406 val_406 -66 val_66 66 val_66 -98 val_98 98 val_98 -98 val_98 98 val_98 -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 39 Data size: 13767 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x Cluster BY key) xx -JOIN -(SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 val_128 128 -128 val_128 128 -128 val_128 128 -146 val_146 146 val_146 -146 val_146 146 val_146 -150 val_150 150 val_150 -213 val_213 213 val_213 -213 val_213 213 val_213 -224 val_224 224 -224 val_224 224 -238 val_238 238 val_238 -238 val_238 238 val_238 -255 val_255 255 val_255 -255 val_255 255 val_255 -273 val_273 273 val_273 -273 val_273 273 val_273 -273 val_273 273 val_273 -278 val_278 278 val_278 -278 val_278 278 val_278 -311 val_311 311 val_311 -311 val_311 311 val_311 -311 val_311 311 val_311 -369 val_369 369 -369 val_369 369 -369 val_369 369 -401 val_401 401 val_401 -401 val_401 401 val_401 -401 val_401 401 val_401 -401 val_401 401 val_401 -401 val_401 401 val_401 -406 val_406 406 val_406 -406 val_406 406 val_406 -406 val_406 406 val_406 -406 val_406 406 val_406 -66 val_66 66 val_66 -98 val_98 98 val_98 -98 val_98 98 val_98 -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx -JOIN -(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx -JOIN -(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.52 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 18 Data size: 4896 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 18 Data size: 4896 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx -JOIN -(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx -JOIN -(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 val_128 128 1 -128 val_128 128 1 -128 val_128 128 1 -146 val_146 146 1 -146 val_146 146 1 -150 val_150 150 1 -213 val_213 213 1 -213 val_213 213 1 -224 val_224 224 1 -224 val_224 224 1 -238 val_238 238 1 -238 val_238 238 1 -255 val_255 255 1 -255 val_255 255 1 -273 val_273 273 1 -273 val_273 273 1 -273 val_273 273 1 -278 val_278 278 1 -278 val_278 278 1 -311 val_311 311 1 -311 val_311 311 1 -311 val_311 311 1 -369 val_369 369 1 -369 val_369 369 1 -369 val_369 369 1 -401 val_401 401 1 -401 val_401 401 1 -401 val_401 401 1 -401 val_401 401 1 -401 val_401 401 1 -406 val_406 406 1 -406 val_406 406 1 -406 val_406 406 1 -406 val_406 406 1 -66 val_66 66 1 -98 val_98 98 1 -98 val_98 98 1 -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx -JOIN -(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx -JOIN -(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.52 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 18 Data size: 4896 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 18 Data size: 4896 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx -JOIN -(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy -ON (xx.key=yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.value, yy.key, yy.value -FROM -(SELECT x.key as key, x.value as value FROM src x CLUSTER BY key) xx -JOIN -(SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy -ON (xx.key=yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 val_128 128 1 -128 val_128 128 1 -128 val_128 128 1 -146 val_146 146 1 -146 val_146 146 1 -150 val_150 150 1 -213 val_213 213 1 -213 val_213 213 1 -224 val_224 224 1 -224 val_224 224 1 -238 val_238 238 1 -238 val_238 238 1 -255 val_255 255 1 -255 val_255 255 1 -273 val_273 273 1 -273 val_273 273 1 -273 val_273 273 1 -278 val_278 278 1 -278 val_278 278 1 -311 val_311 311 1 -311 val_311 311 1 -311 val_311 311 1 -369 val_369 369 1 -369 val_369 369 1 -369 val_369 369 1 -401 val_401 401 1 -401 val_401 401 1 -401 val_401 401 1 -401 val_401 401 1 -401 val_401 401 1 -406 val_406 406 1 -406 val_406 406 1 -406 val_406 406 1 -406 val_406 406 1 -66 val_66 66 1 -98 val_98 98 1 -98 val_98 98 1 diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer15.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer15.q.out deleted file mode 100644 index 2236a97d2e..0000000000 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer15.q.out +++ /dev/null @@ -1,449 +0,0 @@ -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt, yy.key -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -JOIN src yy -ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt, yy.key -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -JOIN src yy -ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 7 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: yy - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.5897436 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) - null sort order: zzz - sort order: +++ - Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.cnt, yy.key -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -JOIN src yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.cnt, yy.key -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -JOIN src yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 1 128 -128 1 128 -128 1 128 -146 1 146 -146 1 146 -150 1 150 -213 1 213 -213 1 213 -224 1 224 -224 1 224 -238 1 238 -238 1 238 -255 1 255 -255 1 255 -273 1 273 -273 1 273 -273 1 273 -278 1 278 -278 1 278 -311 1 311 -311 1 311 -311 1 311 -369 1 369 -369 1 369 -369 1 369 -401 1 401 -401 1 401 -401 1 401 -401 1 401 -401 1 401 -406 1 406 -406 1 406 -406 1 406 -406 1 406 -66 1 66 -98 1 98 -98 1 98 -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt, yy.key -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -JOIN src yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt, yy.key -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -JOIN src yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 6 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: yy - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.5897436 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 25 Data size: 4525 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.cnt, yy.key -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -JOIN src yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.cnt, yy.key -FROM -(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx -JOIN src yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 1 128 -128 1 128 -128 1 128 -146 1 146 -146 1 146 -150 1 150 -213 1 213 -213 1 213 -224 1 224 -224 1 224 -238 1 238 -238 1 238 -255 1 255 -255 1 255 -273 1 273 -273 1 273 -273 1 273 -278 1 278 -278 1 278 -311 1 311 -311 1 311 -311 1 311 -369 1 369 -369 1 369 -369 1 369 -401 1 401 -401 1 401 -401 1 401 -401 1 401 -401 1 401 -406 1 406 -406 1 406 -406 1 406 -406 1 406 -66 1 66 -98 1 98 -98 1 98 diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer5.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer5.q.out deleted file mode 100644 index 0f7cfa4a98..0000000000 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer5.q.out +++ /dev/null @@ -1,802 +0,0 @@ -PREHOOK: query: CREATE TABLE T1_n19(key INT, val STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@T1_n19 -POSTHOOK: query: CREATE TABLE T1_n19(key INT, val STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@T1_n19 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1_n19 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@t1_n19 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1_n19 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@t1_n19 -PREHOOK: query: CREATE TABLE T2_n11(key INT, val STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@T2_n11 -POSTHOOK: query: CREATE TABLE T2_n11(key INT, val STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@T2_n11 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T2_n11 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@t2_n11 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T2_n11 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@t2_n11 -PREHOOK: query: CREATE TABLE T3_n5(key INT, val STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@T3_n5 -POSTHOOK: query: CREATE TABLE T3_n5(key INT, val STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@T3_n5 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE T3_n5 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@t3_n5 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE T3_n5 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@t3_n5 -PREHOOK: query: CREATE TABLE T4_n1(key INT, val STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@T4_n1 -POSTHOOK: query: CREATE TABLE T4_n1(key INT, val STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@T4_n1 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE T4_n1 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@t4_n1 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE T4_n1 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@t4_n1 -PREHOOK: query: CREATE TABLE dest_co1(key INT, val STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest_co1 -POSTHOOK: query: CREATE TABLE dest_co1(key INT, val STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest_co1 -PREHOOK: query: CREATE TABLE dest_co2(key INT, val STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest_co2 -POSTHOOK: query: CREATE TABLE dest_co2(key INT, val STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest_co2 -PREHOOK: query: CREATE TABLE dest_co3(key INT, val STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest_co3 -POSTHOOK: query: CREATE TABLE dest_co3(key INT, val STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest_co3 -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest_co1 -SELECT b.key, d.val -FROM -(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b -JOIN -(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d -ON b.key = d.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n19 -PREHOOK: Input: default@t2_n11 -PREHOOK: Input: default@t3_n5 -PREHOOK: Input: default@t4_n1 -PREHOOK: Output: default@dest_co1 -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest_co1 -SELECT b.key, d.val -FROM -(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b -JOIN -(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d -ON b.key = d.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n19 -POSTHOOK: Input: default@t2_n11 -POSTHOOK: Input: default@t3_n5 -POSTHOOK: Input: default@t4_n1 -POSTHOOK: Output: default@dest_co1 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: n - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: m - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co1 - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co1 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - -PREHOOK: query: INSERT OVERWRITE TABLE dest_co1 -SELECT b.key, d.val -FROM -(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b -JOIN -(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d -ON b.key = d.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n19 -PREHOOK: Input: default@t2_n11 -PREHOOK: Input: default@t3_n5 -PREHOOK: Input: default@t4_n1 -PREHOOK: Output: default@dest_co1 -POSTHOOK: query: INSERT OVERWRITE TABLE dest_co1 -SELECT b.key, d.val -FROM -(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b -JOIN -(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d -ON b.key = d.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n19 -POSTHOOK: Input: default@t2_n11 -POSTHOOK: Input: default@t3_n5 -POSTHOOK: Input: default@t4_n1 -POSTHOOK: Output: default@dest_co1 -POSTHOOK: Lineage: dest_co1.key SIMPLE [(t1_n19)x.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: dest_co1.val SIMPLE [(t4_n1)n.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest_co2 -SELECT b.key, d.val -FROM -(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b -JOIN -(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d -ON b.key = d.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n19 -PREHOOK: Input: default@t2_n11 -PREHOOK: Input: default@t3_n5 -PREHOOK: Input: default@t4_n1 -PREHOOK: Output: default@dest_co2 -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest_co2 -SELECT b.key, d.val -FROM -(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b -JOIN -(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d -ON b.key = d.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n19 -POSTHOOK: Input: default@t2_n11 -POSTHOOK: Input: default@t3_n5 -POSTHOOK: Input: default@t4_n1 -POSTHOOK: Output: default@dest_co2 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: n - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: m - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - -PREHOOK: query: INSERT OVERWRITE TABLE dest_co2 -SELECT b.key, d.val -FROM -(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b -JOIN -(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d -ON b.key = d.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n19 -PREHOOK: Input: default@t2_n11 -PREHOOK: Input: default@t3_n5 -PREHOOK: Input: default@t4_n1 -PREHOOK: Output: default@dest_co2 -POSTHOOK: query: INSERT OVERWRITE TABLE dest_co2 -SELECT b.key, d.val -FROM -(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b -JOIN -(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d -ON b.key = d.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n19 -POSTHOOK: Input: default@t2_n11 -POSTHOOK: Input: default@t3_n5 -POSTHOOK: Input: default@t4_n1 -POSTHOOK: Output: default@dest_co2 -POSTHOOK: Lineage: dest_co2.key SIMPLE [(t1_n19)x.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: dest_co2.val SIMPLE [(t4_n1)n.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest_co3 -SELECT b.key, d.val -FROM -(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b -JOIN -(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d -ON b.key = d.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n19 -PREHOOK: Input: default@t2_n11 -PREHOOK: Input: default@t3_n5 -PREHOOK: Input: default@t4_n1 -PREHOOK: Output: default@dest_co3 -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest_co3 -SELECT b.key, d.val -FROM -(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b -JOIN -(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d -ON b.key = d.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n19 -POSTHOOK: Input: default@t2_n11 -POSTHOOK: Input: default@t3_n5 -POSTHOOK: Input: default@t4_n1 -POSTHOOK: Output: default@dest_co3 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: n - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 4 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co3 - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: m - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co3 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - -PREHOOK: query: INSERT OVERWRITE TABLE dest_co3 -SELECT b.key, d.val -FROM -(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b -JOIN -(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d -ON b.key = d.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n19 -PREHOOK: Input: default@t2_n11 -PREHOOK: Input: default@t3_n5 -PREHOOK: Input: default@t4_n1 -PREHOOK: Output: default@dest_co3 -POSTHOOK: query: INSERT OVERWRITE TABLE dest_co3 -SELECT b.key, d.val -FROM -(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b -JOIN -(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d -ON b.key = d.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n19 -POSTHOOK: Input: default@t2_n11 -POSTHOOK: Input: default@t3_n5 -POSTHOOK: Input: default@t4_n1 -POSTHOOK: Output: default@dest_co3 -POSTHOOK: Lineage: dest_co3.key SIMPLE [(t1_n19)x.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: dest_co3.val SIMPLE [(t4_n1)n.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co1 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_co1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_co1 -#### A masked pattern was here #### -8409 7619696771 -PREHOOK: query: SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co2 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_co2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_co2 -#### A masked pattern was here #### -8409 7619696771 -PREHOOK: query: SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co3 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_co3 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_co3 -#### A masked pattern was here #### -8409 7619696771 diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer7.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer7.q.out deleted file mode 100644 index 9193efb034..0000000000 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer7.q.out +++ /dev/null @@ -1,740 +0,0 @@ -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.94871795 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: yy - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 3 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 3 128 -146 2 146 val_146 -150 1 150 val_150 -213 2 213 val_213 -224 2 224 -238 2 238 val_238 -255 2 255 val_255 -273 3 273 val_273 -278 2 278 val_278 -311 3 311 val_311 -369 3 369 -401 5 401 val_401 -406 4 406 val_406 -66 1 66 val_66 -98 2 98 val_98 -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.94871795 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: yy - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 3 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 3 128 -146 2 146 val_146 -150 1 150 val_150 -213 2 213 val_213 -224 2 224 -238 2 238 val_238 -255 2 255 val_255 -273 3 273 val_273 -278 2 278 val_278 -311 3 311 val_311 -369 3 369 -401 5 401 val_401 -406 4 406 val_406 -66 1 66 val_66 -98 2 98 val_98 -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.94871795 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: yy - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 3 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 3 128 -146 2 146 val_146 -150 1 150 val_150 -213 2 213 val_213 -224 2 224 -238 2 238 val_238 -255 2 255 val_255 -273 3 273 val_273 -278 2 278 val_278 -311 3 311 val_311 -369 3 369 -401 5 401 val_401 -406 4 406 val_406 -66 1 66 val_66 -98 2 98 val_98 -PREHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 39 Data size: 3393 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.94871795 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: yy - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 3 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 810 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, xx.cnt, yy.key, yy.value -FROM (SELECT x.key AS key, count(1) AS cnt - FROM src x JOIN src1 y ON (x.key = y.key) - GROUP BY x.key) xx -JOIN src1 yy -ON xx.key=yy.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 3 128 -146 2 146 val_146 -150 1 150 val_150 -213 2 213 val_213 -224 2 224 -238 2 238 val_238 -255 2 255 val_255 -273 3 273 val_273 -278 2 278 val_278 -311 3 311 val_311 -369 3 369 -401 5 401 val_401 -406 4 406 val_406 -66 1 66 val_66 -98 2 98 val_98 diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer8.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer8.q.out deleted file mode 100644 index 97ac105264..0000000000 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer8.q.out +++ /dev/null @@ -1,1219 +0,0 @@ -PREHOOK: query: EXPLAIN -SELECT x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 7 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: x1 - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 3 -146 val_146 2 -150 val_150 1 -213 val_213 2 -224 2 -238 val_238 2 -255 val_255 2 -273 val_273 3 -278 val_278 2 -311 val_311 3 -369 3 -401 val_401 5 -406 val_406 4 -PREHOOK: query: EXPLAIN -SELECT x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 7 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: x1 - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 3 -146 val_146 2 -150 val_150 1 -213 val_213 2 -224 2 -238 val_238 2 -255 val_255 2 -273 val_273 3 -278 val_278 2 -311 val_311 3 -369 3 -401 val_401 5 -406 val_406 4 -PREHOOK: query: EXPLAIN -SELECT subq1.key, subq1.cnt, x.key, x.value -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value -) subq1 -LEFT OUTER JOIN src1 x ON (x.key = subq1.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT subq1.key, subq1.cnt, x.key, x.value -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value -) subq1 -LEFT OUTER JOIN src1 x ON (x.key = subq1.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 5 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 20.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: x1 - filterExpr: ((UDFToDouble(key) > 100.0D) or key is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: value - Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: value (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 87 Data size: 8273 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 113 Data size: 15686 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 113 Data size: 15686 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 87 Data size: 8273 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT subq1.key, subq1.cnt, x.key, x.value -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value -) subq1 -LEFT OUTER JOIN src1 x ON (x.key = subq1.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT subq1.key, subq1.cnt, x.key, x.value -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value -) subq1 -LEFT OUTER JOIN src1 x ON (x.key = subq1.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### - 3 - 3 - 3 - 3 - 3 val_165 - 3 val_193 - 3 val_265 - 3 val_27 - 3 val_409 - 3 val_484 -0 3 NULL NULL -10 1 NULL NULL -11 1 NULL NULL -12 2 NULL NULL -15 2 NULL NULL -17 1 NULL NULL -18 2 NULL NULL -19 1 NULL NULL -2 1 NULL NULL -4 1 NULL NULL -5 3 NULL NULL -8 1 NULL NULL -9 1 NULL NULL -val_146 1 NULL NULL -val_150 1 NULL NULL -val_213 1 NULL NULL -val_238 1 NULL NULL -val_255 1 NULL NULL -val_273 1 NULL NULL -val_278 1 NULL NULL -val_311 1 NULL NULL -val_401 1 NULL NULL -val_406 1 NULL NULL -PREHOOK: query: EXPLAIN -SELECT subq1.key, subq1.cnt, x.key, x.value -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value -) subq1 -LEFT OUTER JOIN src1 x ON (x.key = subq1.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT subq1.key, subq1.cnt, x.key, x.value -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value -) subq1 -LEFT OUTER JOIN src1 x ON (x.key = subq1.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 5 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 20.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: x1 - filterExpr: ((UDFToDouble(key) > 100.0D) or key is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: value - Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: value (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 87 Data size: 8273 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 113 Data size: 15686 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 113 Data size: 15686 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 87 Data size: 8273 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT subq1.key, subq1.cnt, x.key, x.value -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value -) subq1 -LEFT OUTER JOIN src1 x ON (x.key = subq1.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT subq1.key, subq1.cnt, x.key, x.value -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.value as key, count(1) as cnt from src1 x1 where x1.key > 100 group by x1.value -) subq1 -LEFT OUTER JOIN src1 x ON (x.key = subq1.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### - 3 - 3 - 3 - 3 - 3 val_165 - 3 val_193 - 3 val_265 - 3 val_27 - 3 val_409 - 3 val_484 -0 3 NULL NULL -10 1 NULL NULL -11 1 NULL NULL -12 2 NULL NULL -15 2 NULL NULL -17 1 NULL NULL -18 2 NULL NULL -19 1 NULL NULL -2 1 NULL NULL -4 1 NULL NULL -5 3 NULL NULL -8 1 NULL NULL -9 1 NULL NULL -val_146 1 NULL NULL -val_150 1 NULL NULL -val_213 1 NULL NULL -val_238 1 NULL NULL -val_255 1 NULL NULL -val_273 1 NULL NULL -val_278 1 NULL NULL -val_311 1 NULL NULL -val_401 1 NULL NULL -val_406 1 NULL NULL -PREHOOK: query: EXPLAIN -SELECT x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key, x1.value -) subq1 -JOIN src1 x ON (x.key = subq1.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key, x1.value -) subq1 -JOIN src1 x ON (x.key = subq1.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 7 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: x1 - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 26344 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 74 Data size: 13764 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 74 Data size: 13764 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 74 Data size: 13764 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -WARNING: Comparing a bigint and a string may result in a loss of precision. -PREHOOK: query: EXPLAIN -SELECT subq1.key, subq1.value, x.key, x.value -FROM -( SELECT cast(x.key as INT) as key, count(1) as value from src x where x.key < 20 group by x.key - UNION ALL - SELECT count(1) as key, cast(x1.key as INT) as value from src x1 where x1.key > 100 group by x1.key -) subq1 -FULL OUTER JOIN src1 x ON (x.key = subq1.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT subq1.key, subq1.value, x.key, x.value -FROM -( SELECT cast(x.key as INT) as key, count(1) as value from src x where x.key < 20 group by x.key - UNION ALL - SELECT count(1) as key, cast(x1.key as INT) as value from src x1 where x1.key > 100 group by x1.key -) subq1 -FULL OUTER JOIN src1 x ON (x.key = subq1.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 7 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 20.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: x1 - filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: double) - Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToLong(UDFToInteger(_col0)) (type: bigint), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint), _col1 (type: bigint), UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 166 Data size: 3984 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: double) - Statistics: Num rows: 166 Data size: 3984 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Full Outer Join 0 to 1 - keys: - 0 _col2 (type: double) - 1 _col2 (type: double) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 241 Data size: 9741 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint), _col1 (type: bigint), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 241 Data size: 9741 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 241 Data size: 9741 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint), UDFToLong(UDFToInteger(_col0)) (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint), _col1 (type: bigint), UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 166 Data size: 3984 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: double) - Statistics: Num rows: 166 Data size: 3984 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer9.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer9.q.out deleted file mode 100644 index 97be240b89..0000000000 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer9.q.out +++ /dev/null @@ -1,634 +0,0 @@ -PREHOOK: query: CREATE TABLE tmp_n2(c1 INT, c2 INT, c3 STRING, c4 STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tmp_n2 -POSTHOOK: query: CREATE TABLE tmp_n2(c1 INT, c2 INT, c3 STRING, c4 STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tmp_n2 -PREHOOK: query: INSERT OVERWRITE TABLE tmp_n2 -SELECT x.key, y.key, x.value, y.value FROM src x JOIN src y ON (x.key = y.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@tmp_n2 -POSTHOOK: query: INSERT OVERWRITE TABLE tmp_n2 -SELECT x.key, y.key, x.value, y.value FROM src x JOIN src y ON (x.key = y.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@tmp_n2 -POSTHOOK: Lineage: tmp_n2.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tmp_n2.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tmp_n2.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tmp_n2.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN -SELECT xx.key, yy.key, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx -JOIN -(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy -ON (xx.key = yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, yy.key, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx -JOIN -(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy -ON (xx.key = yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: ((c1 < 120) and (c1 > 100)) (type: boolean) - Statistics: Num rows: 1028 Data size: 4112 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((c1 < 120) and (c1 > 100)) (type: boolean) - Statistics: Num rows: 198 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: c1 (type: int) - minReductionHashAggr: 0.70202017 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: x1 - filterExpr: ((c2 > 100) and (c2 < 120)) (type: boolean) - Statistics: Num rows: 1028 Data size: 4112 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((c2 > 100) and (c2 < 120)) (type: boolean) - Statistics: Num rows: 198 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: c2 (type: int) - minReductionHashAggr: 0.70202017 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 59 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 59 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 59 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, yy.key, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx -JOIN -(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy -ON (xx.key = yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, yy.key, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx -JOIN -(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy -ON (xx.key = yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -103 103 4 4 -104 104 4 4 -105 105 1 1 -111 111 1 1 -113 113 4 4 -114 114 1 1 -116 116 1 1 -118 118 4 4 -119 119 9 9 -PREHOOK: query: EXPLAIN -SELECT xx.key, yy.key, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx -JOIN -(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy -ON (xx.key = yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key, yy.key, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx -JOIN -(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy -ON (xx.key = yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: ((c1 < 120) and (c1 > 100)) (type: boolean) - Statistics: Num rows: 1028 Data size: 4112 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((c1 < 120) and (c1 > 100)) (type: boolean) - Statistics: Num rows: 198 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: c1 (type: int) - minReductionHashAggr: 0.70202017 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: x1 - filterExpr: ((c2 > 100) and (c2 < 120)) (type: boolean) - Statistics: Num rows: 1028 Data size: 4112 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((c2 > 100) and (c2 < 120)) (type: boolean) - Statistics: Num rows: 198 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: c2 (type: int) - minReductionHashAggr: 0.70202017 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 59 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 59 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 59 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 59 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key, yy.key, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx -JOIN -(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy -ON (xx.key = yy.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key, yy.key, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1) xx -JOIN -(SELECT x1.c2 AS key, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy -ON (xx.key = yy.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -103 103 4 4 -104 104 4 4 -105 105 1 1 -111 111 1 1 -113 113 4 4 -114 114 1 1 -116 116 1 1 -118 118 4 4 -119 119 9 9 -PREHOOK: query: EXPLAIN -SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx -JOIN -(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy -ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) -PREHOOK: type: QUERY -PREHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx -JOIN -(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy -ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: ((c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 1028 Data size: 97660 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 248 Data size: 23560 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: c1 (type: int), c3 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: x1 - filterExpr: ((c2 > 100) and (c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 1028 Data size: 101772 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((c2 > 100) and (c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 198 Data size: 19602 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: c1 (type: int), c3 (type: string) - outputColumnNames: c1, c3 - Statistics: Num rows: 198 Data size: 19602 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: c1 (type: int), c3 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 99 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 99 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 99 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx -JOIN -(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy -ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) -PREHOOK: type: QUERY -PREHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx -JOIN -(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy -ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -103 val_103 103 val_103 4 4 -104 val_104 104 val_104 4 4 -105 val_105 105 val_105 1 1 -111 val_111 111 val_111 1 1 -113 val_113 113 val_113 4 4 -114 val_114 114 val_114 1 1 -116 val_116 116 val_116 1 1 -118 val_118 118 val_118 4 4 -119 val_119 119 val_119 9 9 -PREHOOK: query: EXPLAIN -SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx -JOIN -(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy -ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) -PREHOOK: type: QUERY -PREHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx -JOIN -(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy -ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: x - filterExpr: ((c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 1028 Data size: 97660 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 248 Data size: 23560 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: c1 (type: int), c3 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: x1 - filterExpr: ((c2 > 100) and (c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 1028 Data size: 101772 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((c2 > 100) and (c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 198 Data size: 19602 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: c1 (type: int), c3 (type: string) - outputColumnNames: c1, c3 - Statistics: Num rows: 198 Data size: 19602 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: c1 (type: int), c3 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 99 Data size: 10197 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 124 Data size: 12772 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 99 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 99 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 99 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx -JOIN -(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy -ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) -PREHOOK: type: QUERY -PREHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt -FROM -(SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp_n2 x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx -JOIN -(SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp_n2 x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy -ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tmp_n2 -#### A masked pattern was here #### -103 val_103 103 val_103 4 4 -104 val_104 104 val_104 4 4 -105 val_105 105 val_105 1 1 -111 val_111 111 val_111 1 1 -113 val_113 113 val_113 4 4 -114 val_114 114 val_114 1 1 -116 val_116 116 val_116 1 1 -118 val_118 118 val_118 4 4 -119 val_119 119 val_119 9 9 diff --git a/ql/src/test/results/clientpositive/llap/cp_sel.q.out b/ql/src/test/results/clientpositive/llap/cp_sel.q.out deleted file mode 100644 index 4fb741011b..0000000000 --- a/ql/src/test/results/clientpositive/llap/cp_sel.q.out +++ /dev/null @@ -1,260 +0,0 @@ -PREHOOK: query: explain -select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 order by 1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: explain -select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 order by 1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: (11.0D = 11.0D) (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) - null sort order: z - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 1 - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 'hello' (type: string), 'world' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 order by 1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 order by 1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -#### A masked pattern was here #### -0 val_0 hello world -PREHOOK: query: create table testpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by(key) sorted by(key) into 2 buckets -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@testpartbucket -POSTHOOK: query: create table testpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by(key) sorted by(key) into 2 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@testpartbucket -PREHOOK: query: explain -insert overwrite table testpartbucket partition(ds,hr) select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Output: default@testpartbucket -POSTHOOK: query: explain -insert overwrite table testpartbucket partition(ds,hr) select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: (11.0D = 11.0D) (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), 'hello' (type: string), 'world' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.testpartbucket - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string), hr (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1058 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - ds - hr - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.testpartbucket - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.testpartbucket - -PREHOOK: query: insert overwrite table testpartbucket partition(ds,hr) select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Output: default@testpartbucket -POSTHOOK: query: insert overwrite table testpartbucket partition(ds,hr) select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Output: default@testpartbucket@ds=hello/hr=world -POSTHOOK: Lineage: testpartbucket PARTITION(ds=hello,hr=world).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: testpartbucket PARTITION(ds=hello,hr=world).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select * from testpartbucket limit 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@testpartbucket -PREHOOK: Input: default@testpartbucket@ds=hello/hr=world -#### A masked pattern was here #### -POSTHOOK: query: select * from testpartbucket limit 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@testpartbucket -POSTHOOK: Input: default@testpartbucket@ds=hello/hr=world -#### A masked pattern was here #### -10 val_10 hello world -10 val_10 hello world -104 val_104 hello world -PREHOOK: query: drop table testpartbucket -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@testpartbucket -PREHOOK: Output: default@testpartbucket -POSTHOOK: query: drop table testpartbucket -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@testpartbucket -POSTHOOK: Output: default@testpartbucket diff --git a/ql/src/test/results/clientpositive/llap/cross_join_merge.q.out b/ql/src/test/results/clientpositive/llap/cross_join_merge.q.out deleted file mode 100644 index 5fae51d9c8..0000000000 --- a/ql/src/test/results/clientpositive/llap/cross_join_merge.q.out +++ /dev/null @@ -1,653 +0,0 @@ -Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -PREHOOK: query: explain -select src1.key from src src1 join src src2 join src src3 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select src1.key from src src1 join src src2 join src src3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) - Reducer 3 <- Map 4 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0 - Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0 - Statistics: Num rows: 125000000 Data size: 10875000000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 125000000 Data size: 10875000000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select src1.key from src src1 join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select src1.key from src src1 join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select src1.key from src src1 join src src2 join src src3 where src1.key=src2.key and src1.key=src3.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select src1.key from src src1 join src src2 join src src3 where src1.key=src2.key and src1.key=src3.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -PREHOOK: query: explain -select src1.key from src src1 join src src2 on 5 = src2.key join src src3 on src1.key=src3.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select src1.key from src src1 join src src2 on 5 = src2.key join src src3 on src1.key=src3.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (key is not null or (UDFToDouble(key) = 5.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) = 5.0D) (type: boolean) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src3 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0 - Statistics: Num rows: 197750 Data size: 17204250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 197750 Data size: 17204250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -PREHOOK: query: explain -select src1.key from src src1 left outer join src src2 join src src3 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select src1.key from src src1 left outer join src src2 join src src3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Map 4 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0 - Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0 - Statistics: Num rows: 125000000 Data size: 10875000000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 125000000 Data size: 10875000000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select src1.key from src src1 left outer join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select src1.key from src src1 left outer join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/cte_6.q.out b/ql/src/test/results/clientpositive/llap/cte_6.q.out deleted file mode 100644 index 0649491fc3..0000000000 --- a/ql/src/test/results/clientpositive/llap/cte_6.q.out +++ /dev/null @@ -1,62 +0,0 @@ -PREHOOK: query: explain -with Q1 as ( select key from sRc where key = '5') -select CPS.key from Q1 CPS -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -with Q1 as ( select key from sRc where key = '5') -select CPS.key from Q1 CPS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - filterExpr: (key = '5') (type: boolean) - Filter Operator - predicate: (key = '5') (type: boolean) - Select Operator - expressions: '5' (type: string) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: explain -with Q1 as ( select key from q2 where key = '5'), -Q2 as ( select key from sRc where key = '5') -select CPS.key from Q1 CPS -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -with Q1 as ( select key from q2 where key = '5'), -Q2 as ( select key from sRc where key = '5') -select CPS.key from Q1 CPS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - filterExpr: (key = '5') (type: boolean) - Filter Operator - predicate: (key = '5') (type: boolean) - Select Operator - expressions: '5' (type: string) - outputColumnNames: _col0 - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/default_constraint.q.out b/ql/src/test/results/clientpositive/llap/default_constraint.q.out index 858dd5ab63..a04da4e2b4 100644 --- a/ql/src/test/results/clientpositive/llap/default_constraint.q.out +++ b/ql/src/test/results/clientpositive/llap/default_constraint.q.out @@ -1902,7 +1902,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: enforce_constraint(_col0 is not null) (type: boolean) + predicate: enforce_constraint(127Y is not null) (type: boolean) Statistics: Num rows: 1 Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: double), CAST( _col5 AS decimal(9,2)) (type: decimal(9,2)) @@ -2248,7 +2248,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: enforce_constraint(_col0 is not null) (type: boolean) + predicate: enforce_constraint(108Y is not null) (type: boolean) Statistics: Num rows: 1 Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: double), CAST( _col5 AS decimal(9,2)) (type: decimal(9,2)) diff --git a/ql/src/test/results/clientpositive/llap/distinct_groupby.q.out b/ql/src/test/results/clientpositive/llap/distinct_groupby.q.out deleted file mode 100644 index b396e454c7..0000000000 --- a/ql/src/test/results/clientpositive/llap/distinct_groupby.q.out +++ /dev/null @@ -1,2328 +0,0 @@ -PREHOOK: query: explain select distinct key from src1 group by key,value -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct key from src1 group by key,value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct key from src1 group by key,value -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct key from src1 group by key,value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### - -128 -146 -150 -213 -224 -238 -255 -273 -278 -311 -369 -401 -406 -66 -98 -PREHOOK: query: explain select distinct count(value) from src group by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct count(value) from src group by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(value) - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: bigint) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct count(value) from src group by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select distinct count(value) from src group by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -2 -3 -5 -1 -4 -PREHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.9166667 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct count(*) from src1 where key in (128,146,150) -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct count(*) from src1 where key in (128,146,150) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -3 -PREHOOK: query: explain select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.9166667 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -3 -PREHOOK: query: explain select distinct count(*)+1 from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct count(*)+1 from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.96 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: (_col0 + 1L) (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct count(*)+1 from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct count(*)+1 from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -26 -PREHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 39 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1), count(_col3) - minReductionHashAggr: 0.974359 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -37 37 -PREHOOK: query: explain select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: bigint) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -1 -PREHOOK: query: explain select distinct key from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct key from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string) - minReductionHashAggr: 0.52 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct key from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct key from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### - -146 -213 -273 -311 -369 -406 -66 -98 -128 -150 -224 -238 -255 -278 -401 -PREHOOK: query: explain select distinct * from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct * from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct * from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct * from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### - - val_27 - val_409 - val_484 -128 -150 val_150 -213 val_213 -224 -238 val_238 -273 val_273 -278 val_278 -401 val_401 -406 val_406 -98 val_98 - val_165 - val_193 - val_265 -146 val_146 -255 val_255 -311 val_311 -369 -66 val_66 -PREHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) group by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) group by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: bigint) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct count(*) from src1 where key in (128,146,150) group by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct count(*) from src1 where key in (128,146,150) group by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -1 -PREHOOK: query: explain select distinct key, count(*) from src1 where key in (128,146,150) group by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct key, count(*) from src1 where key in (128,146,150) group by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct key, count(*) from src1 where key in (128,146,150) group by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct key, count(*) from src1 where key in (128,146,150) group by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -146 1 -128 1 -150 1 -PREHOOK: query: explain select distinct * from (select * from src1) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct * from (select * from src1) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct * from (select * from src1) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct * from (select * from src1) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### - - val_27 - val_409 - val_484 -128 -150 val_150 -213 val_213 -224 -238 val_238 -273 val_273 -278 val_278 -401 val_401 -406 val_406 -98 val_98 - val_165 - val_193 - val_265 -146 val_146 -255 val_255 -311 val_311 -369 -66 val_66 -PREHOOK: query: explain select distinct * from (select count(*) from src1) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct * from (select count(*) from src1) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct * from (select count(*) from src1) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct * from (select count(*) from src1) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -25 -PREHOOK: query: explain select distinct * from (select * from src1 where key in (128,146,150)) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct * from (select * from src1 where key in (128,146,150)) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct * from (select * from src1 where key in (128,146,150)) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct * from (select * from src1 where key in (128,146,150)) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 -150 val_150 -146 val_146 -PREHOOK: query: explain select distinct key from (select * from src1 where key in (128,146,150)) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct key from (select * from src1 where key in (128,146,150)) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct key from (select * from src1 where key in (128,146,150)) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct key from (select * from src1 where key in (128,146,150)) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -146 -128 -150 -PREHOOK: query: explain select distinct * from (select count(*) from src1 where key in (128,146,150)) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct * from (select count(*) from src1 where key in (128,146,150)) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.9166667 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct * from (select count(*) from src1 where key in (128,146,150)) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct * from (select count(*) from src1 where key in (128,146,150)) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -3 -PREHOOK: query: explain select distinct sum(key) over () from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct sum(key) over () from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: 0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: 0 ASC NULLS FIRST - partition by: 0 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col0 - name: sum - window function: GenericUDAFSumDouble - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: sum_window_0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: double) - minReductionHashAggr: 0.52 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct sum(key) over () from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct sum(key) over () from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -3556.0 -PREHOOK: query: explain select distinct * from (select sum(key) over () from src1) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct * from (select sum(key) over () from src1) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: 0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: 0 ASC NULLS FIRST - partition by: 0 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col0 - name: sum - window function: GenericUDAFSumDouble - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: sum_window_0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: double) - minReductionHashAggr: 0.52 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct * from (select sum(key) over () from src1) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct * from (select sum(key) over () from src1) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -3556.0 -PREHOOK: query: explain select distinct value, key, count(1) over (partition by value) from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct value, key, count(1) over (partition by value) from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: value (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string, _col1: string - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: count_window_0 - arguments: 1 - name: count - window function: GenericUDAFCountEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), count_window_0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct value, key, count(1) over (partition by value) from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct value, key, count(1) over (partition by value) from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### - 128 7 - 369 7 -val_150 150 1 -val_165 1 -val_193 1 -val_238 238 1 -val_255 255 1 -val_265 1 -val_273 273 1 -val_278 278 1 -val_311 311 1 -val_401 401 1 -val_484 1 -val_66 66 1 -val_98 98 1 - 7 - 224 7 -val_146 146 1 -val_213 213 1 -val_27 1 -val_406 406 1 -val_409 1 -PREHOOK: query: explain select value, key, count(1) over (partition by value) from src1 group by value, key -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select value, key, count(1) over (partition by value) from src1 group by value, key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string, _col1: string - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: count_window_0 - arguments: 1 - name: count - window function: GenericUDAFCountEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), count_window_0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select value, key, count(1) over (partition by value) from src1 group by value, key -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select value, key, count(1) over (partition by value) from src1 group by value, key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### - 369 4 - 4 - 128 4 - 224 4 -val_146 146 1 -val_150 150 1 -val_213 213 1 -val_238 238 1 -val_278 278 1 -val_406 406 1 -val_409 1 -val_66 66 1 -val_165 1 -val_193 1 -val_255 255 1 -val_265 1 -val_27 1 -val_273 273 1 -val_311 311 1 -val_401 401 1 -val_484 1 -val_98 98 1 -PREHOOK: query: explain select value, key, count(1) over (partition by value) from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select value, key, count(1) over (partition by value) from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: value (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string, _col1: string - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: count_window_0 - arguments: 1 - name: count - window function: GenericUDAFCountEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), count_window_0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select value, key, count(1) over (partition by value) from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select value, key, count(1) over (partition by value) from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### - 7 - 7 - 7 - 7 - 128 7 - 369 7 - 224 7 -val_146 146 1 -val_150 150 1 -val_213 213 1 -val_238 238 1 -val_278 278 1 -val_406 406 1 -val_409 1 -val_66 66 1 -val_165 1 -val_193 1 -val_255 255 1 -val_265 1 -val_27 1 -val_273 273 1 -val_311 311 1 -val_401 401 1 -val_484 1 -val_98 98 1 -PREHOOK: query: explain select distinct count(*)+key from src1 group by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct count(*)+key from src1 group by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.52 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: (UDFToDouble(_col1) + UDFToDouble(_col0)) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: double) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct count(*)+key from src1 group by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct count(*)+key from src1 group by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -67.0 -129.0 -151.0 -214.0 -225.0 -239.0 -279.0 -402.0 -407.0 -99.0 -147.0 -256.0 -274.0 -312.0 -370.0 -NULL -PREHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 39 Data size: 10413 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1), count(_col3) - keys: _col0 (type: string) - minReductionHashAggr: 0.94871795 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: bigint), _col2 (type: bigint) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: bigint) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -1 1 -2 2 -3 3 -4 4 -5 5 -PREHOOK: query: explain select distinct * from (select distinct * from src1) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: explain select distinct * from (select distinct * from src1) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select distinct * from (select distinct * from src1) as T -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select distinct * from (select distinct * from src1) as T -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### - - val_27 - val_409 - val_484 -128 -150 val_150 -213 val_213 -224 -238 val_238 -273 val_273 -278 val_278 -401 val_401 -406 val_406 -98 val_98 - val_165 - val_193 - val_265 -146 val_146 -255 val_255 -311 val_311 -369 -66 val_66 diff --git a/ql/src/test/results/clientpositive/llap/distinct_stats.q.out b/ql/src/test/results/clientpositive/llap/distinct_stats.q.out deleted file mode 100644 index f0daa4c4f1..0000000000 --- a/ql/src/test/results/clientpositive/llap/distinct_stats.q.out +++ /dev/null @@ -1,250 +0,0 @@ -PREHOOK: query: create table t1_n11 (a string, b string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t1_n11 -POSTHOOK: query: create table t1_n11 (a string, b string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t1_n11 -PREHOOK: query: insert into table t1_n11 select * from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@t1_n11 -POSTHOOK: query: insert into table t1_n11 select * from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@t1_n11 -POSTHOOK: Lineage: t1_n11.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: t1_n11.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: analyze table t1_n11 compute statistics for columns a,b -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@t1_n11 -PREHOOK: Output: default@t1_n11 -#### A masked pattern was here #### -POSTHOOK: query: analyze table t1_n11 compute statistics for columns a,b -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@t1_n11 -POSTHOOK: Output: default@t1_n11 -#### A masked pattern was here #### -PREHOOK: query: explain -select count(distinct b) from t1_n11 group by a -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n11 -#### A masked pattern was here #### -POSTHOOK: query: explain -select count(distinct b) from t1_n11 group by a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n11 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1_n11 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: string), b (type: string) - outputColumnNames: a, b - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: a (type: string), b (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select distinct(b) from t1_n11 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n11 -#### A masked pattern was here #### -POSTHOOK: query: explain -select distinct(b) from t1_n11 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n11 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1_n11 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: b (type: string) - outputColumnNames: b - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: b (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select a, count(*) from t1_n11 group by a -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n11 -#### A masked pattern was here #### -POSTHOOK: query: explain -select a, count(*) from t1_n11 group by a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n11 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1_n11 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: string) - outputColumnNames: a - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: a (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: drop table t1_n11 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@t1_n11 -PREHOOK: Output: default@t1_n11 -POSTHOOK: query: drop table t1_n11 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@t1_n11 -POSTHOOK: Output: default@t1_n11 diff --git a/ql/src/test/results/clientpositive/llap/distinct_windowing.q.out b/ql/src/test/results/clientpositive/llap/distinct_windowing.q.out deleted file mode 100644 index 4cc5bcbb79..0000000000 --- a/ql/src/test/results/clientpositive/llap/distinct_windowing.q.out +++ /dev/null @@ -1,575 +0,0 @@ -PREHOOK: query: drop table over10k_n15 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table over10k_n15 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table over10k_n15( - t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - `dec` decimal(4,2), - bin binary) - row format delimited - fields terminated by '|' -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over10k_n15 -POSTHOOK: query: create table over10k_n15( - t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - `dec` decimal(4,2), - bin binary) - row format delimited - fields terminated by '|' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over10k_n15 -PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k_n15 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over10k_n15 -POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k_n15 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over10k_n15 -PREHOOK: query: EXPLAIN - SELECT fv - FROM (SELECT distinct first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n15) sq -ORDER BY fv - LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n15 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN - SELECT fv - FROM (SELECT distinct first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n15) sq -ORDER BY fv - LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n15 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over10k_n15 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: si (type: smallint), i (type: int) - null sort order: az - sort order: ++ - Map-reduce partition columns: si (type: smallint) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: t (type: tinyint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: tinyint, _col1: smallint, _col2: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS LAST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: first_value_window_0 - arguments: _col0 - name: first_value - window function: GenericUDAFFirstValueEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: first_value_window_0 (type: tinyint) - null sort order: z - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Select Operator - expressions: first_value_window_0 (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: tinyint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - null sort order: z - sort order: + - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: SELECT fv - FROM (SELECT distinct first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n15) sq -ORDER BY fv - LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n15 -#### A masked pattern was here #### -POSTHOOK: query: SELECT fv - FROM (SELECT distinct first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n15) sq -ORDER BY fv - LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n15 -#### A masked pattern was here #### --2 --1 -0 -1 -2 -3 -4 -6 -7 -8 -PREHOOK: query: EXPLAIN - SELECT lv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv - FROM over10k_n15) sq -ORDER BY lv - LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n15 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN - SELECT lv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv - FROM over10k_n15) sq -ORDER BY lv - LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n15 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over10k_n15 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: si (type: smallint), i (type: int) - null sort order: az - sort order: ++ - Map-reduce partition columns: si (type: smallint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: smallint, _col2: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS LAST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: last_value_window_0 - arguments: _col2 - name: last_value - window function: GenericUDAFLastValueEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: last_value_window_0 (type: int) - null sort order: z - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Select Operator - expressions: last_value_window_0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: SELECT lv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv - FROM over10k_n15) sq -ORDER BY lv - LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n15 -#### A masked pattern was here #### -POSTHOOK: query: SELECT lv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv - FROM over10k_n15) sq -ORDER BY lv - LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n15 -#### A masked pattern was here #### -65536 -65537 -65538 -65539 -65540 -65541 -65542 -65543 -65544 -65545 -PREHOOK: query: EXPLAIN - SELECT lv, fv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv, - first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n15) sq -ORDER BY lv, fv - LIMIT 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n15 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN - SELECT lv, fv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv, - first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n15) sq -ORDER BY lv, fv - LIMIT 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n15 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over10k_n15 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: si (type: smallint), i (type: int) - null sort order: az - sort order: ++ - Map-reduce partition columns: si (type: smallint) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: t (type: tinyint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: tinyint, _col1: smallint, _col2: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS LAST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: last_value_window_0 - arguments: _col2 - name: last_value - window function: GenericUDAFLastValueEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - window function definition - alias: first_value_window_1 - arguments: _col0 - name: first_value - window function: GenericUDAFFirstValueEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++ - keys: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) - null sort order: zz - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - top n: 50 - Select Operator - expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: tinyint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: tinyint) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: tinyint) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: tinyint) - null sort order: zz - sort order: ++ - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: tinyint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 50 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 50 - Processor Tree: - ListSink - -PREHOOK: query: SELECT lv, fv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv, - first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n15) sq -ORDER BY lv, fv - LIMIT 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n15 -#### A masked pattern was here #### -POSTHOOK: query: SELECT lv, fv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv, - first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n15) sq -ORDER BY lv, fv - LIMIT 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n15 -#### A masked pattern was here #### -65536 -2 -65536 2 -65536 9 -65536 12 -65536 13 -65536 18 -65536 22 -65536 23 -65536 37 -65536 39 -65536 42 -65536 48 -65536 55 -65536 56 -65536 58 -65536 61 -65536 69 -65536 71 -65536 73 -65536 75 -65536 78 -65536 80 -65536 83 -65536 84 -65536 88 -65536 95 -65536 104 -65536 107 -65536 108 -65536 111 -65536 114 -65536 118 -65536 119 -65536 121 -65537 4 -65537 8 -65537 9 -65537 11 -65537 18 -65537 22 -65537 25 -65537 36 -65537 38 -65537 51 -65537 53 -65537 54 -65537 55 -65537 56 -65537 57 -65537 59 diff --git a/ql/src/test/results/clientpositive/llap/distinct_windowing_no_cbo.q.out b/ql/src/test/results/clientpositive/llap/distinct_windowing_no_cbo.q.out deleted file mode 100644 index 6ee9ad4e5b..0000000000 --- a/ql/src/test/results/clientpositive/llap/distinct_windowing_no_cbo.q.out +++ /dev/null @@ -1,985 +0,0 @@ -PREHOOK: query: drop table over10k_n14 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table over10k_n14 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table over10k_n14( - t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - `dec` decimal(4,2), - bin binary) - row format delimited - fields terminated by '|' -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over10k_n14 -POSTHOOK: query: create table over10k_n14( - t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - `dec` decimal(4,2), - bin binary) - row format delimited - fields terminated by '|' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over10k_n14 -PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k_n14 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over10k_n14 -POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k_n14 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over10k_n14 -PREHOOK: query: EXPLAIN - SELECT fv - FROM (SELECT distinct first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n14) sq -ORDER BY fv - LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN - SELECT fv - FROM (SELECT distinct first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n14) sq -ORDER BY fv - LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over10k_n14 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: si (type: smallint), i (type: int) - null sort order: az - sort order: ++ - Map-reduce partition columns: si (type: smallint) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: t (type: tinyint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: tinyint, _col1: smallint, _col2: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS LAST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: first_value_window_0 - arguments: _col0 - name: first_value - window function: GenericUDAFFirstValueEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: first_value_window_0 (type: tinyint) - null sort order: z - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Select Operator - expressions: first_value_window_0 (type: tinyint) - outputColumnNames: first_value_window_0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: first_value_window_0 (type: tinyint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - null sort order: z - sort order: + - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: SELECT fv - FROM (SELECT distinct first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n14) sq -ORDER BY fv - LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -POSTHOOK: query: SELECT fv - FROM (SELECT distinct first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n14) sq -ORDER BY fv - LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### --2 --1 -0 -1 -2 -3 -4 -6 -7 -8 -PREHOOK: query: EXPLAIN - SELECT lv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv - FROM over10k_n14) sq -ORDER BY lv - LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN - SELECT lv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv - FROM over10k_n14) sq -ORDER BY lv - LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over10k_n14 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: si (type: smallint), i (type: int) - null sort order: az - sort order: ++ - Map-reduce partition columns: si (type: smallint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: smallint, _col2: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS LAST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: last_value_window_0 - arguments: _col2 - name: last_value - window function: GenericUDAFLastValueEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: last_value_window_0 (type: int) - null sort order: z - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Select Operator - expressions: last_value_window_0 (type: int) - outputColumnNames: last_value_window_0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: last_value_window_0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: SELECT lv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv - FROM over10k_n14) sq -ORDER BY lv - LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -POSTHOOK: query: SELECT lv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv - FROM over10k_n14) sq -ORDER BY lv - LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -65536 -65537 -65538 -65539 -65540 -65541 -65542 -65543 -65544 -65545 -PREHOOK: query: EXPLAIN - SELECT lv, fv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv, - first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n14) sq -ORDER BY lv, fv - LIMIT 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN - SELECT lv, fv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv, - first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n14) sq -ORDER BY lv, fv - LIMIT 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over10k_n14 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: si (type: smallint), i (type: int) - null sort order: az - sort order: ++ - Map-reduce partition columns: si (type: smallint) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: t (type: tinyint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: tinyint, _col1: smallint, _col2: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS LAST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: last_value_window_0 - arguments: _col2 - name: last_value - window function: GenericUDAFLastValueEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - window function definition - alias: first_value_window_1 - arguments: _col0 - name: first_value - window function: GenericUDAFFirstValueEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++ - keys: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) - null sort order: zz - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - top n: 50 - Select Operator - expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) - outputColumnNames: last_value_window_0, first_value_window_1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: tinyint) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: tinyint) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: tinyint) - null sort order: zz - sort order: ++ - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: tinyint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 50 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 50 - Processor Tree: - ListSink - -PREHOOK: query: SELECT lv, fv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv, - first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n14) sq -ORDER BY lv, fv - LIMIT 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -POSTHOOK: query: SELECT lv, fv - FROM (SELECT distinct last_value(i) OVER ( PARTITION BY si ORDER BY i ) AS lv, - first_value(t) OVER ( PARTITION BY si ORDER BY i ) AS fv - FROM over10k_n14) sq -ORDER BY lv, fv - LIMIT 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -65536 -2 -65536 2 -65536 9 -65536 12 -65536 13 -65536 18 -65536 22 -65536 23 -65536 37 -65536 39 -65536 42 -65536 48 -65536 55 -65536 56 -65536 58 -65536 61 -65536 69 -65536 71 -65536 73 -65536 75 -65536 78 -65536 80 -65536 83 -65536 84 -65536 88 -65536 95 -65536 104 -65536 107 -65536 108 -65536 111 -65536 114 -65536 118 -65536 119 -65536 121 -65537 4 -65537 8 -65537 9 -65537 11 -65537 18 -65537 22 -65537 25 -65537 36 -65537 38 -65537 51 -65537 53 -65537 54 -65537 55 -65537 56 -65537 57 -65537 59 -PREHOOK: query: explain -select si, max(f) mf, rank() over ( partition by si order by mf ) r -FROM over10k_n14 -GROUP BY si -HAVING max(f) > 0 -ORDER BY si, r -limit 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -POSTHOOK: query: explain -select si, max(f) mf, rank() over ( partition by si order by mf ) r -FROM over10k_n14 -GROUP BY si -HAVING max(f) > 0 -ORDER BY si, r -limit 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over10k_n14 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: si (type: smallint), f (type: float) - outputColumnNames: si, f - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(f) - keys: si (type: smallint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: float) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - keys: KEY._col0 (type: smallint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > 0.0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint), _col1 (type: float) - null sort order: az - sort order: ++ - Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: float) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: smallint, _col1: float - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS LAST - partition by: _col0 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col1 - name: rank - window function: GenericUDAFRankEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++ - keys: _col0 (type: smallint), rank_window_0 (type: int) - null sort order: zz - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - top n: 50 - Select Operator - expressions: _col0 (type: smallint), _col1 (type: float), rank_window_0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint), _col2 (type: int) - null sort order: zz - sort order: ++ - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: float) - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: float), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 50 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 50 - Processor Tree: - ListSink - -PREHOOK: query: select si, max(f) mf, rank() over ( partition by si order by mf ) r -FROM over10k_n14 -GROUP BY si -HAVING max(f) > 0 -ORDER BY si, r -limit 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -POSTHOOK: query: select si, max(f) mf, rank() over ( partition by si order by mf ) r -FROM over10k_n14 -GROUP BY si -HAVING max(f) > 0 -ORDER BY si, r -limit 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -256 94.87 1 -257 98.0 1 -258 98.19 1 -259 99.71 1 -260 99.78 1 -261 98.09 1 -262 98.41 1 -263 97.32 1 -264 97.65 1 -265 96.18 1 -266 99.41 1 -267 99.8 1 -268 93.34 1 -269 99.24 1 -270 98.57 1 -271 96.02 1 -272 92.82 1 -273 97.51 1 -274 95.43 1 -275 99.68 1 -276 98.94 1 -277 97.26 1 -278 98.56 1 -279 98.09 1 -280 99.21 1 -281 99.32 1 -282 95.49 1 -283 96.46 1 -284 99.34 1 -285 99.34 1 -286 92.77 1 -287 99.29 1 -288 96.71 1 -289 97.13 1 -290 99.88 1 -291 99.18 1 -292 94.99 1 -293 95.36 1 -294 99.34 1 -295 90.67 1 -296 96.85 1 -297 95.62 1 -298 99.98 1 -299 99.36 1 -300 98.76 1 -301 99.08 1 -302 99.84 1 -303 98.57 1 -304 94.68 1 -305 96.1 1 -PREHOOK: query: explain -select distinct si, rank() over ( partition by si order by i ) r -FROM over10k_n14 -ORDER BY si, r -limit 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -POSTHOOK: query: explain -select distinct si, rank() over ( partition by si order by i ) r -FROM over10k_n14 -ORDER BY si, r -limit 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over10k_n14 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: si (type: smallint), i (type: int) - null sort order: az - sort order: ++ - Map-reduce partition columns: si (type: smallint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: smallint, _col2: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS LAST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col2 - name: rank - window function: GenericUDAFRankEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++ - keys: _col1 (type: smallint), rank_window_0 (type: int) - null sort order: zz - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - top n: 50 - Select Operator - expressions: rank_window_0 (type: int), _col1 (type: smallint) - outputColumnNames: rank_window_0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: smallint), rank_window_0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: smallint), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint), _col1 (type: int) - null sort order: zz - sort order: ++ - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 50 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 50 - Processor Tree: - ListSink - -PREHOOK: query: select distinct si, rank() over ( partition by si order by i ) r -FROM over10k_n14 -ORDER BY si, r -limit 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -POSTHOOK: query: select distinct si, rank() over ( partition by si order by i ) r -FROM over10k_n14 -ORDER BY si, r -limit 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k_n14 -#### A masked pattern was here #### -256 1 -256 2 -256 3 -256 4 -256 5 -256 6 -256 7 -256 8 -256 9 -256 10 -256 11 -256 13 -256 14 -256 15 -256 16 -256 17 -256 18 -256 19 -256 20 -256 21 -256 22 -256 23 -256 24 -256 25 -256 26 -256 27 -256 28 -256 29 -256 30 -256 32 -256 33 -256 34 -256 35 -256 37 -257 1 -257 2 -257 3 -257 4 -257 5 -257 6 -257 7 -257 8 -257 9 -257 10 -257 11 -257 12 -257 13 -257 14 -257 16 -257 17 diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid2.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid2.q.out deleted file mode 100644 index 25f388e857..0000000000 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid2.q.out +++ /dev/null @@ -1,103 +0,0 @@ -PREHOOK: query: CREATE TABLE non_acid(key string, value string) -PARTITIONED BY(ds string, hr int) -CLUSTERED BY(key) INTO 2 BUCKETS -STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@non_acid -POSTHOOK: query: CREATE TABLE non_acid(key string, value string) -PARTITIONED BY(ds string, hr int) -CLUSTERED BY(key) INTO 2 BUCKETS -STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@non_acid -PREHOOK: query: explain -insert into table non_acid partition(ds,hr) select * from srcpart sort by value -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@non_acid -POSTHOOK: query: explain -insert into table non_acid partition(ds,hr) select * from srcpart sort by value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: string), UDFToInteger(UDFToInteger(_col3)) (type: int), _bucket_number (type: string), _col1 (type: string) - null sort order: aaaa - sort order: ++++ - Map-reduce partition columns: _col2 (type: string), UDFToInteger(UDFToInteger(_col3)) (type: int) - Statistics: Num rows: 2000 Data size: 732000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _bucket_number - File Output Operator - compressed: false - Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 2000 Data size: 732000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.non_acid - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - ds - hr - replace: false - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.non_acid - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.non_acid - diff --git a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out index 6abd6f3c82..e96363fe29 100644 --- a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out +++ b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out @@ -3393,21 +3393,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_uami_n1 - filterExpr: ((de) IN (109.23, 119.23) and enforce_constraint(vc is not null)) (type: boolean) + filterExpr: (de) IN (109.23, 119.23) (type: boolean) Statistics: Num rows: 1002 Data size: 225450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((de) IN (109.23, 119.23) and enforce_constraint(vc is not null)) (type: boolean) - Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (de) IN (109.23, 119.23) (type: boolean) + Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct), i (type: int), vc (type: varchar(128)) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1806 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1806 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col3 (type: varchar(128)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -3415,18 +3415,25 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 3.14 (type: decimal(5,2)), VALUE._col1 (type: varchar(128)) + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 3.14 (type: decimal(3,2)), VALUE._col1 (type: varchar(128)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Statistics: Num rows: 6 Data size: 1806 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: enforce_constraint((_col2 is not null and _col3 is not null)) (type: boolean) Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid_uami_n1 - Write Type: UPDATE + Select Operator + expressions: _col0 (type: struct), _col1 (type: int), CAST( _col2 AS decimal(5,2)) (type: decimal(5,2)), _col3 (type: varchar(128)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acid_uami_n1 + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -3492,21 +3499,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_uami_n1 - filterExpr: ((de = 3.14) and enforce_constraint((i is not null and vc is not null))) (type: boolean) + filterExpr: (de = 3.14) (type: boolean) Statistics: Num rows: 1002 Data size: 225450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((de = 3.14) and enforce_constraint((i is not null and vc is not null))) (type: boolean) - Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (de = 3.14) (type: boolean) + Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct), i (type: int), vc (type: varchar(128)) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 301 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) null sort order: z sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 301 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col3 (type: varchar(128)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -3514,18 +3521,25 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 3.14 (type: decimal(5,2)), VALUE._col1 (type: varchar(128)) + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 3.14159 (type: decimal(6,5)), VALUE._col1 (type: varchar(128)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 301 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Statistics: Num rows: 3 Data size: 903 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: enforce_constraint((_col1 is not null and _col3 is not null)) (type: boolean) Statistics: Num rows: 1 Data size: 301 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid_uami_n1 - Write Type: UPDATE + Select Operator + expressions: _col0 (type: struct), _col1 (type: int), CAST( _col2 AS decimal(5,2)) (type: decimal(5,2)), _col3 (type: varchar(128)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 301 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 301 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acid_uami_n1 + Write Type: UPDATE Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/escape_clusterby1.q.out b/ql/src/test/results/clientpositive/llap/escape_clusterby1.q.out deleted file mode 100644 index 968e1ade8e..0000000000 --- a/ql/src/test/results/clientpositive/llap/escape_clusterby1.q.out +++ /dev/null @@ -1,120 +0,0 @@ -PREHOOK: query: explain -select key, value from src cluster by key, value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select key, value from src cluster by key, value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select `key`, value from src cluster by `key`, value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select `key`, value from src cluster by `key`, value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/escape_distributeby1.q.out b/ql/src/test/results/clientpositive/llap/escape_distributeby1.q.out deleted file mode 100644 index 0b601fe14d..0000000000 --- a/ql/src/test/results/clientpositive/llap/escape_distributeby1.q.out +++ /dev/null @@ -1,120 +0,0 @@ -PREHOOK: query: explain -select key, value from src distribute by key, value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select key, value from src distribute by key, value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select `key`, value from src distribute by `key`, value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select `key`, value from src distribute by `key`, value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/escape_orderby1.q.out b/ql/src/test/results/clientpositive/llap/escape_orderby1.q.out deleted file mode 100644 index e384133465..0000000000 --- a/ql/src/test/results/clientpositive/llap/escape_orderby1.q.out +++ /dev/null @@ -1,118 +0,0 @@ -PREHOOK: query: explain -select key, value from src order by key, value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select key, value from src order by key, value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select `key`, value from src order by `key`, value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select `key`, value from src order by `key`, value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/escape_sortby1.q.out b/ql/src/test/results/clientpositive/llap/escape_sortby1.q.out deleted file mode 100644 index 032af8ac48..0000000000 --- a/ql/src/test/results/clientpositive/llap/escape_sortby1.q.out +++ /dev/null @@ -1,118 +0,0 @@ -PREHOOK: query: explain -select key, value from src sort by key, value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select key, value from src sort by key, value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select `key`, value from src sort by `key`, value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select `key`, value from src sort by `key`, value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/except_all.q.out b/ql/src/test/results/clientpositive/llap/except_all.q.out deleted file mode 100644 index 4c2498f5a8..0000000000 --- a/ql/src/test/results/clientpositive/llap/except_all.q.out +++ /dev/null @@ -1,943 +0,0 @@ -PREHOOK: query: create table a_n15(key int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@a_n15 -POSTHOOK: query: create table a_n15(key int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@a_n15 -PREHOOK: query: insert into table a_n15 values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@a_n15 -POSTHOOK: query: insert into table a_n15 values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@a_n15 -POSTHOOK: Lineage: a_n15.key SCRIPT [] -PREHOOK: query: create table b_n11(key bigint) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@b_n11 -POSTHOOK: query: create table b_n11(key bigint) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@b_n11 -PREHOOK: query: insert into table b_n11 values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@b_n11 -POSTHOOK: query: insert into table b_n11 values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@b_n11 -POSTHOOK: Lineage: b_n11.key SCRIPT [] -PREHOOK: query: select * from a_n15 except all select * from b_n11 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n15 -PREHOOK: Input: default@b_n11 -#### A masked pattern was here #### -POSTHOOK: query: select * from a_n15 except all select * from b_n11 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n15 -POSTHOOK: Input: default@b_n11 -#### A masked pattern was here #### -2 -2 -0 -PREHOOK: query: drop table a_n15 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@a_n15 -PREHOOK: Output: default@a_n15 -POSTHOOK: query: drop table a_n15 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@a_n15 -POSTHOOK: Output: default@a_n15 -PREHOOK: query: drop table b_n11 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@b_n11 -PREHOOK: Output: default@b_n11 -POSTHOOK: query: drop table b_n11 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@b_n11 -POSTHOOK: Output: default@b_n11 -PREHOOK: query: create table a_n15(key int, value int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@a_n15 -POSTHOOK: query: create table a_n15(key int, value int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@a_n15 -PREHOOK: query: insert into table a_n15 values (1,2),(1,2),(1,3),(2,3),(2,2) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@a_n15 -POSTHOOK: query: insert into table a_n15 values (1,2),(1,2),(1,3),(2,3),(2,2) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@a_n15 -POSTHOOK: Lineage: a_n15.key SCRIPT [] -POSTHOOK: Lineage: a_n15.value SCRIPT [] -PREHOOK: query: create table b_n11(key int, value int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@b_n11 -POSTHOOK: query: create table b_n11(key int, value int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@b_n11 -PREHOOK: query: insert into table b_n11 values (1,2),(2,3),(2,2),(2,2),(2,20) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@b_n11 -POSTHOOK: query: insert into table b_n11 values (1,2),(2,3),(2,2),(2,2),(2,20) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@b_n11 -POSTHOOK: Lineage: b_n11.key SCRIPT [] -POSTHOOK: Lineage: b_n11.value SCRIPT [] -PREHOOK: query: select * from a_n15 except all select * from b_n11 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n15 -PREHOOK: Input: default@b_n11 -#### A masked pattern was here #### -POSTHOOK: query: select * from a_n15 except all select * from b_n11 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n15 -POSTHOOK: Input: default@b_n11 -#### A masked pattern was here #### -1 3 -1 2 -PREHOOK: query: select * from b_n11 except all select * from a_n15 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n15 -PREHOOK: Input: default@b_n11 -#### A masked pattern was here #### -POSTHOOK: query: select * from b_n11 except all select * from a_n15 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n15 -POSTHOOK: Input: default@b_n11 -#### A masked pattern was here #### -2 2 -2 20 -PREHOOK: query: select * from b_n11 except all select * from a_n15 intersect distinct select * from b_n11 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n15 -PREHOOK: Input: default@b_n11 -#### A masked pattern was here #### -POSTHOOK: query: select * from b_n11 except all select * from a_n15 intersect distinct select * from b_n11 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n15 -POSTHOOK: Input: default@b_n11 -#### A masked pattern was here #### -2 2 -2 20 -PREHOOK: query: select * from b_n11 except all select * from a_n15 except distinct select * from b_n11 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n15 -PREHOOK: Input: default@b_n11 -#### A masked pattern was here #### -POSTHOOK: query: select * from b_n11 except all select * from a_n15 except distinct select * from b_n11 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n15 -POSTHOOK: Input: default@b_n11 -#### A masked pattern was here #### -PREHOOK: query: select * from a_n15 except all select * from b_n11 union all select * from a_n15 except distinct select * from b_n11 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n15 -PREHOOK: Input: default@b_n11 -#### A masked pattern was here #### -POSTHOOK: query: select * from a_n15 except all select * from b_n11 union all select * from a_n15 except distinct select * from b_n11 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n15 -POSTHOOK: Input: default@b_n11 -#### A masked pattern was here #### -1 3 -PREHOOK: query: select * from a_n15 except all select * from b_n11 union select * from a_n15 except distinct select * from b_n11 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n15 -PREHOOK: Input: default@b_n11 -#### A masked pattern was here #### -POSTHOOK: query: select * from a_n15 except all select * from b_n11 union select * from a_n15 except distinct select * from b_n11 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n15 -POSTHOOK: Input: default@b_n11 -#### A masked pattern was here #### -1 3 -PREHOOK: query: select * from a_n15 except all select * from b_n11 except distinct select * from a_n15 except distinct select * from b_n11 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n15 -PREHOOK: Input: default@b_n11 -#### A masked pattern was here #### -POSTHOOK: query: select * from a_n15 except all select * from b_n11 except distinct select * from a_n15 except distinct select * from b_n11 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n15 -POSTHOOK: Input: default@b_n11 -#### A masked pattern was here #### -PREHOOK: query: select * from (select a_n15.key, b_n11.value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub1 -except all -select * from (select a_n15.key, b_n11.value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub2 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n15 -PREHOOK: Input: default@b_n11 -#### A masked pattern was here #### -POSTHOOK: query: select * from (select a_n15.key, b_n11.value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub1 -except all -select * from (select a_n15.key, b_n11.value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n15 -POSTHOOK: Input: default@b_n11 -#### A masked pattern was here #### -PREHOOK: query: select * from (select a_n15.key, b_n11.value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub1 -except all -select * from (select b_n11.value as key, a_n15.key as value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub2 -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n15 -PREHOOK: Input: default@b_n11 -#### A masked pattern was here #### -POSTHOOK: query: select * from (select a_n15.key, b_n11.value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub1 -except all -select * from (select b_n11.value as key, a_n15.key as value from a_n15 join b_n11 on a_n15.key=b_n11.key)sub2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n15 -POSTHOOK: Input: default@b_n11 -#### A masked pattern was here #### -2 3 -2 3 -1 2 -1 2 -1 2 -2 20 -2 20 -PREHOOK: query: explain select * from src except all select * from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select * from src except all select * from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 2L (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col3), sum(_col2) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint), _col3 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ((2L * _col2) - (3L * _col3)) (type: bigint), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - function name: UDTFReplicateRows - Select Operator - expressions: col1 (type: string), col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 1L (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col3), sum(_col2) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint), _col3 (type: bigint) - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from src except all select * from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select * from src except all select * from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -PREHOOK: query: explain select * from src except all select * from src except distinct select * from src except distinct select * from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select * from src except all select * from src except distinct select * from src except distinct select * from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 10 <- Map 1 (SIMPLE_EDGE), Union 6 (CONTAINS) - Reducer 11 <- Map 1 (SIMPLE_EDGE), Union 8 (CONTAINS) - Reducer 13 <- Map 12 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) - Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS) - Reducer 9 <- Union 8 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 12 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 10 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 1L (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 251 Data size: 48516 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2), sum(_col3) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 125 Data size: 24250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 125 Data size: 24250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint), _col3 (type: bigint) - Reducer 11 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 1L (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 270 Data size: 52380 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2), sum(_col3) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 135 Data size: 26190 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 135 Data size: 26190 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint), _col3 (type: bigint) - Reducer 13 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 1L (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col3), sum(_col2) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint), _col3 (type: bigint) - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 2L (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col3), sum(_col2) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint), _col3 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 48500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ((2L * _col2) - (3L * _col3)) (type: bigint), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - function name: UDTFReplicateRows - Select Operator - expressions: col1 (type: string), col2 (type: string) - outputColumnNames: col1, col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: col1 (type: string), col2 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 2L (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 251 Data size: 48516 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2), sum(_col3) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 125 Data size: 24250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 125 Data size: 24250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint), _col3 (type: bigint) - Reducer 7 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 125 Data size: 24250 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((_col2 > 0L) and ((_col2 * 2L) = _col3)) (type: boolean) - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string), _col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 2L (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint), (_col2 * _col3) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 270 Data size: 52380 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2), sum(_col3) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 135 Data size: 26190 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 135 Data size: 26190 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint), _col3 (type: bigint) - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 135 Data size: 26190 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((_col2 > 0L) and ((_col2 * 2L) = _col3)) (type: boolean) - Statistics: Num rows: 22 Data size: 4268 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 22 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 22 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 - Union 6 - Vertex: Union 6 - Union 8 - Vertex: Union 8 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from src except all select * from src except distinct select * from src except distinct select * from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select * from src except all select * from src except distinct select * from src except distinct select * from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -PREHOOK: query: explain select value from a_n15 group by value except distinct select key from b_n11 group by key -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n15 -PREHOOK: Input: default@b_n11 -#### A masked pattern was here #### -POSTHOOK: query: explain select value from a_n15 group by value except distinct select key from b_n11 group by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n15 -POSTHOOK: Input: default@b_n11 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a_n15 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: int) - outputColumnNames: value - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: value (type: int) - minReductionHashAggr: 0.6 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: b_n11 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: int) - minReductionHashAggr: 0.6 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), 2L (type: bigint), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col2 (type: bigint), (_col1 * _col2) (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1), sum(_col2) - keys: _col0 (type: int) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((_col1 > 0L) and ((_col1 * 2L) = _col2)) (type: boolean) - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), 1L (type: bigint), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col2 (type: bigint), (_col1 * _col2) (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1), sum(_col2) - keys: _col0 (type: int) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select value from a_n15 group by value except distinct select key from b_n11 group by key -PREHOOK: type: QUERY -PREHOOK: Input: default@a_n15 -PREHOOK: Input: default@b_n11 -#### A masked pattern was here #### -POSTHOOK: query: select value from a_n15 group by value except distinct select key from b_n11 group by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@a_n15 -POSTHOOK: Input: default@b_n11 -#### A masked pattern was here #### -3 diff --git a/ql/src/test/results/clientpositive/llap/exec_parallel_column_stats.q.out b/ql/src/test/results/clientpositive/llap/exec_parallel_column_stats.q.out deleted file mode 100644 index 7756c05c2d..0000000000 --- a/ql/src/test/results/clientpositive/llap/exec_parallel_column_stats.q.out +++ /dev/null @@ -1,90 +0,0 @@ -PREHOOK: query: create table t_n25 as select * from src -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@t_n25 -POSTHOOK: query: create table t_n25 as select * from src -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t_n25 -POSTHOOK: Lineage: t_n25.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: t_n25.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain analyze table t_n25 compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@t_n25 -PREHOOK: Output: default@t_n25 -#### A masked pattern was here #### -POSTHOOK: query: explain analyze table t_n25 compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@t_n25 -POSTHOOK: Output: default@t_n25 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t_n25 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.t_n25 - -PREHOOK: query: analyze table t_n25 compute statistics for columns -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@t_n25 -PREHOOK: Output: default@t_n25 -#### A masked pattern was here #### -POSTHOOK: query: analyze table t_n25 compute statistics for columns -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@t_n25 -POSTHOOK: Output: default@t_n25 -#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/explain_ddl.q.out b/ql/src/test/results/clientpositive/llap/explain_ddl.q.out deleted file mode 100644 index 20a7ee846f..0000000000 --- a/ql/src/test/results/clientpositive/llap/explain_ddl.q.out +++ /dev/null @@ -1,663 +0,0 @@ -PREHOOK: query: CREATE VIEW V1_n0 AS SELECT key, value from src -PREHOOK: type: CREATEVIEW -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@V1_n0 -POSTHOOK: query: CREATE VIEW V1_n0 AS SELECT key, value from src -POSTHOOK: type: CREATEVIEW -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@V1_n0 -POSTHOOK: Lineage: V1_n0.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: V1_n0.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select count(*) from V1_n0 where key > 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@v1_n0 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from V1_n0 where key > 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@v1_n0 -#### A masked pattern was here #### -497 -PREHOOK: query: CREATE TABLE M1 AS SELECT key, value from src -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@M1 -POSTHOOK: query: CREATE TABLE M1 AS SELECT key, value from src -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@M1 -POSTHOOK: Lineage: m1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: m1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select count(*) from M1 where key > 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@m1 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from M1 where key > 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@m1 -#### A masked pattern was here #### -497 -PREHOOK: query: EXPLAIN CREATE TABLE M1 AS select * from src -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@M1 -POSTHOOK: query: EXPLAIN CREATE TABLE M1 AS select * from src -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@M1 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.M1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: col1, col2 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-4 - Create Table - columns: key string, value string - name: default.M1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat - serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.M1 - - Stage: Stage-0 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: EXPLAIN CREATE TABLE M1 AS select * from M1 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@m1 -PREHOOK: Output: database:default -PREHOOK: Output: default@M1 -POSTHOOK: query: EXPLAIN CREATE TABLE M1 AS select * from M1 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@m1 -POSTHOOK: Output: database:default -POSTHOOK: Output: default@M1 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: m1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.M1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: col1, col2 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-4 - Create Table - columns: key string, value string - name: default.M1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat - serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.M1 - - Stage: Stage-0 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: EXPLAIN CREATE TABLE M1 AS select * from V1_n0 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Input: default@v1_n0 -PREHOOK: Output: database:default -PREHOOK: Output: default@M1 -POSTHOOK: query: EXPLAIN CREATE TABLE M1 AS select * from V1_n0 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Input: default@v1_n0 -POSTHOOK: Output: database:default -POSTHOOK: Output: default@M1 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - properties: - insideView TRUE - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.M1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: col1, col2 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-4 - Create Table - columns: key string, value string - name: default.M1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat - serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.M1 - - Stage: Stage-0 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: EXPLAIN CREATE TABLE V1_n0 AS select * from M1 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@m1 -PREHOOK: Output: database:default -PREHOOK: Output: default@V1_n0 -POSTHOOK: query: EXPLAIN CREATE TABLE V1_n0 AS select * from M1 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@m1 -POSTHOOK: Output: database:default -POSTHOOK: Output: default@V1_n0 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: m1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.V1_n0 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: col1, col2 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-4 - Create Table - columns: key string, value string - name: default.V1_n0 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat - serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.V1_n0 - - Stage: Stage-0 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: EXPLAIN CREATE VIEW V1_n0 AS select * from M1 -PREHOOK: type: CREATEVIEW -PREHOOK: Input: default@m1 -PREHOOK: Output: database:default -PREHOOK: Output: default@V1_n0 -POSTHOOK: query: EXPLAIN CREATE VIEW V1_n0 AS select * from M1 -POSTHOOK: type: CREATEVIEW -POSTHOOK: Input: default@m1 -POSTHOOK: Output: database:default -POSTHOOK: Output: default@V1_n0 -STAGE DEPENDENCIES: - Stage-1 is a root stage - -STAGE PLANS: - Stage: Stage-1 - Create View - columns: key string, value string - expanded text: select `m1`.`key`, `m1`.`value` from `default`.`M1` - name: default.V1_n0 - original text: select * from M1 - -PREHOOK: query: EXPLAIN CREATE TABLE M1 LIKE src -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@M1 -POSTHOOK: query: EXPLAIN CREATE TABLE M1 LIKE src -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@M1 -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Create Table - default input format: org.apache.hadoop.mapred.TextInputFormat - default output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat - default serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - like: src - name: default.M1 - table properties: - bucketing_version 2 - -PREHOOK: query: EXPLAIN CREATE TABLE M1 LIKE M1 -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@M1 -POSTHOOK: query: EXPLAIN CREATE TABLE M1 LIKE M1 -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@M1 -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Create Table - default input format: org.apache.hadoop.mapred.TextInputFormat - default output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat - default serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - like: M1 - name: default.M1 - table properties: - bucketing_version 2 - -PREHOOK: query: EXPLAIN DROP TABLE M1 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@m1 -PREHOOK: Output: default@m1 -POSTHOOK: query: EXPLAIN DROP TABLE M1 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@m1 -POSTHOOK: Output: default@m1 -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Drop Table - table: M1 - -PREHOOK: query: select count(*) from M1 where key > 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@m1 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from M1 where key > 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@m1 -#### A masked pattern was here #### -497 -PREHOOK: query: EXPLAIN INSERT INTO M1 SELECT * FROM M1 -PREHOOK: type: QUERY -PREHOOK: Input: default@m1 -PREHOOK: Output: default@m1 -POSTHOOK: query: EXPLAIN INSERT INTO M1 SELECT * FROM M1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@m1 -POSTHOOK: Output: default@m1 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: m1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.m1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.m1 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.m1 - -PREHOOK: query: select count(*) from M1 where key > 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@m1 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from M1 where key > 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@m1 -#### A masked pattern was here #### -497 -PREHOOK: query: EXPLAIN TRUNCATE TABLE M1 -PREHOOK: type: TRUNCATETABLE -PREHOOK: Output: default@m1 -POSTHOOK: query: EXPLAIN TRUNCATE TABLE M1 -POSTHOOK: type: TRUNCATETABLE -POSTHOOK: Output: default@m1 -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Truncate Table or Partition - table name: default.m1 - -PREHOOK: query: select count(*) from M1 where key > 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@m1 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from M1 where key > 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@m1 -#### A masked pattern was here #### -497 diff --git a/ql/src/test/results/clientpositive/llap/filter_aggr.q.out b/ql/src/test/results/clientpositive/llap/filter_aggr.q.out deleted file mode 100644 index 674fc40c81..0000000000 --- a/ql/src/test/results/clientpositive/llap/filter_aggr.q.out +++ /dev/null @@ -1,490 +0,0 @@ -PREHOOK: query: explain extended -select key, c, m from -( -select key, c, 1 as m from (select key, count(key) as c from src group by key)s1 -union all -select key, c, 2 as m from (select key, count(key) as c from src group by key)s2 -)sub -where m = 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain extended -select key, c, m from -( -select key, c, 1 as m from (select key, count(key) as c from src group by key)s1 -union all -select key, c, 2 as m from (select key, count(key) as c from src group by key)s2 -)sub -where m = 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -OPTIMIZED SQL: SELECT `key`, COUNT(`key`) AS `c`, 1 AS `m` -FROM `default`.`src` -GROUP BY `key` -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(key) - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Reducer 2 - Execution mode: vectorized, llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), 1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:bigint:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select key, c, m from -( -select key, c, 1 as m from (select key, count(key) as c from src group by key)s1 -union all -select key, c, 2 as m from (select key, count(key) as c from src group by key)s2 -)sub -where m = 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select key, c, m from -( -select key, c, 1 as m from (select key, count(key) as c from src group by key)s1 -union all -select key, c, 2 as m from (select key, count(key) as c from src group by key)s2 -)sub -where m = 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -10 1 1 -100 2 1 -104 2 1 -105 1 1 -113 2 1 -116 1 1 -118 2 1 -125 2 1 -126 1 1 -129 2 1 -131 1 1 -136 1 1 -143 1 1 -145 1 1 -146 2 1 -15 2 1 -152 2 1 -153 1 1 -156 1 1 -158 1 1 -160 1 1 -163 1 1 -164 2 1 -165 2 1 -167 3 1 -17 1 1 -170 1 1 -174 2 1 -175 2 1 -178 1 1 -179 2 1 -18 2 1 -181 1 1 -189 1 1 -19 1 1 -190 1 1 -191 2 1 -193 3 1 -200 2 1 -201 1 1 -203 2 1 -207 2 1 -209 2 1 -213 2 1 -216 2 1 -222 1 1 -223 2 1 -228 1 1 -229 2 1 -235 1 1 -239 2 1 -24 2 1 -244 1 1 -247 1 1 -248 1 1 -249 1 1 -26 2 1 -263 1 1 -266 1 1 -27 1 1 -272 2 1 -273 3 1 -274 1 1 -28 1 1 -280 2 1 -282 2 1 -283 1 1 -284 1 1 -286 1 1 -289 1 1 -298 3 1 -302 1 1 -305 1 1 -306 1 1 -308 1 1 -311 3 1 -317 2 1 -322 2 1 -323 1 1 -325 2 1 -327 3 1 -33 1 1 -331 2 1 -333 2 1 -336 1 1 -339 1 1 -341 1 1 -345 1 1 -35 3 1 -353 2 1 -360 1 1 -362 1 1 -364 1 1 -365 1 1 -369 3 1 -375 1 1 -377 1 1 -378 1 1 -379 1 1 -392 1 1 -393 1 1 -394 1 1 -399 2 1 -4 1 1 -402 1 1 -406 4 1 -409 3 1 -41 1 1 -411 1 1 -418 1 1 -419 1 1 -42 2 1 -421 1 1 -424 2 1 -43 1 1 -431 3 1 -435 1 1 -436 1 1 -437 1 1 -438 3 1 -439 2 1 -443 1 1 -448 1 1 -449 1 1 -453 1 1 -454 3 1 -455 1 1 -457 1 1 -458 2 1 -459 2 1 -460 1 1 -462 2 1 -466 3 1 -467 1 1 -469 5 1 -47 1 1 -472 1 1 -478 2 1 -479 1 1 -480 3 1 -484 1 1 -485 1 1 -490 1 1 -492 2 1 -493 1 1 -496 1 1 -497 1 1 -498 3 1 -53 1 1 -54 1 1 -57 1 1 -58 2 1 -66 1 1 -67 2 1 -69 1 1 -70 3 1 -77 1 1 -82 1 1 -84 2 1 -90 3 1 -97 2 1 -98 2 1 -0 3 1 -103 2 1 -11 1 1 -111 1 1 -114 1 1 -119 3 1 -12 2 1 -120 2 1 -128 3 1 -133 1 1 -134 2 1 -137 2 1 -138 4 1 -149 2 1 -150 1 1 -155 1 1 -157 1 1 -162 1 1 -166 1 1 -168 1 1 -169 4 1 -172 2 1 -176 2 1 -177 1 1 -180 1 1 -183 1 1 -186 1 1 -187 3 1 -192 1 1 -194 1 1 -195 2 1 -196 1 1 -197 2 1 -199 3 1 -2 1 1 -20 1 1 -202 1 1 -205 2 1 -208 3 1 -214 1 1 -217 2 1 -218 1 1 -219 2 1 -221 2 1 -224 2 1 -226 1 1 -230 5 1 -233 2 1 -237 2 1 -238 2 1 -241 1 1 -242 2 1 -252 1 1 -255 2 1 -256 2 1 -257 1 1 -258 1 1 -260 1 1 -262 1 1 -265 2 1 -275 1 1 -277 4 1 -278 2 1 -281 2 1 -285 1 1 -287 1 1 -288 2 1 -291 1 1 -292 1 1 -296 1 1 -30 1 1 -307 2 1 -309 2 1 -310 1 1 -315 1 1 -316 3 1 -318 3 1 -321 2 1 -332 1 1 -335 1 1 -338 1 1 -34 1 1 -342 2 1 -344 2 1 -348 5 1 -351 1 1 -356 1 1 -366 1 1 -367 2 1 -368 1 1 -37 2 1 -373 1 1 -374 1 1 -382 2 1 -384 3 1 -386 1 1 -389 1 1 -395 2 1 -396 3 1 -397 2 1 -400 1 1 -401 5 1 -403 3 1 -404 2 1 -407 1 1 -413 2 1 -414 2 1 -417 3 1 -427 1 1 -429 2 1 -430 3 1 -432 1 1 -44 1 1 -444 1 1 -446 1 1 -452 1 1 -463 2 1 -468 4 1 -470 1 1 -475 1 1 -477 1 1 -481 1 1 -482 1 1 -483 1 1 -487 1 1 -489 4 1 -491 1 1 -494 1 1 -495 1 1 -5 3 1 -51 2 1 -64 1 1 -65 1 1 -72 2 1 -74 1 1 -76 2 1 -78 1 1 -8 1 1 -80 1 1 -83 2 1 -85 1 1 -86 1 1 -87 1 1 -9 1 1 -92 1 1 -95 2 1 -96 1 1 diff --git a/ql/src/test/results/clientpositive/llap/filter_cond_pushdown.q.out b/ql/src/test/results/clientpositive/llap/filter_cond_pushdown.q.out deleted file mode 100644 index b78ea41263..0000000000 --- a/ql/src/test/results/clientpositive/llap/filter_cond_pushdown.q.out +++ /dev/null @@ -1,537 +0,0 @@ -PREHOOK: query: EXPLAIN -SELECT f.key, g.value -FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') -WHERE (f.key = m.key AND f.value='2008-04-08' AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT f.key, g.value -FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') -WHERE (f.key = m.key AND f.value='2008-04-08' AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: m - filterExpr: ((value <> '') and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((value <> '') and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), (value = '2008-04-08') (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: f - filterExpr: ((value) IN ('2008-04-08', '2008-04-09') and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((value) IN ('2008-04-08', '2008-04-09') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), (value = '2008-04-08') (type: boolean), (value = '2008-04-09') (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean), _col2 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: g - filterExpr: (value <> '') (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (value <> '') (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col4 and _col2) or _col5)} - Statistics: Num rows: 3 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 3 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col3, _col6 - Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col3 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: EXPLAIN -SELECT f.key, g.value -FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') -WHERE (f.key = m.key AND f.value IN ('2008-04-08','2008-04-10') AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT f.key, g.value -FROM src f JOIN src m JOIN src g ON(g.value = m.value AND m.value is not null AND m.value !='') -WHERE (f.key = m.key AND f.value IN ('2008-04-08','2008-04-10') AND m.value='2008-04-08') OR (f.key = m.key AND f.value='2008-04-09') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: m - filterExpr: ((value <> '') and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((value <> '') and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), (value = '2008-04-08') (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: f - filterExpr: ((value) IN ('2008-04-08', '2008-04-10', '2008-04-09') and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((value) IN ('2008-04-08', '2008-04-10', '2008-04-09') and key is not null) (type: boolean) - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), (value) IN ('2008-04-08', '2008-04-10') (type: boolean), (value = '2008-04-09') (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean), _col2 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: g - filterExpr: (value <> '') (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (value <> '') (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col4 and _col2) or _col5)} - Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col3, _col6 - Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col3 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: EXPLAIN -SELECT t1.key -FROM cbo_t1 t1 -JOIN ( - SELECT t2.key - FROM cbo_t2 t2 - JOIN (SELECT * FROM cbo_t3 t3 WHERE c_int=1) t3 ON t2.key=t3.c_int - WHERE ((t2.key=t3.key) AND (t2.c_float + t3.c_float > 2)) OR - ((t2.key=t3.key) AND (t2.c_int + t3.c_int > 2))) t4 ON t1.key=t4.key -PREHOOK: type: QUERY -PREHOOK: Input: default@cbo_t1 -PREHOOK: Input: default@cbo_t1@dt=2014 -PREHOOK: Input: default@cbo_t2 -PREHOOK: Input: default@cbo_t2@dt=2014 -PREHOOK: Input: default@cbo_t3 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT t1.key -FROM cbo_t1 t1 -JOIN ( - SELECT t2.key - FROM cbo_t2 t2 - JOIN (SELECT * FROM cbo_t3 t3 WHERE c_int=1) t3 ON t2.key=t3.c_int - WHERE ((t2.key=t3.key) AND (t2.c_float + t3.c_float > 2)) OR - ((t2.key=t3.key) AND (t2.c_int + t3.c_int > 2))) t4 ON t1.key=t4.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@cbo_t1 -POSTHOOK: Input: default@cbo_t1@dt=2014 -POSTHOOK: Input: default@cbo_t2 -POSTHOOK: Input: default@cbo_t2@dt=2014 -POSTHOOK: Input: default@cbo_t3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - filterExpr: (UDFToDouble(key) = 1.0D) (type: boolean) - Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) = 1.0D) (type: boolean) - Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 5370 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 5370 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: t2 - filterExpr: (UDFToDouble(key) = 1.0D) (type: boolean) - Statistics: Num rows: 20 Data size: 1767 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) = 1.0D) (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), c_float (type: float), ((c_int + 1) > 2) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: float), _col2 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: t3 - filterExpr: ((c_int = 1) and (UDFToDouble(key) = 1.0D)) (type: boolean) - Statistics: Num rows: 20 Data size: 1767 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((c_int = 1) and (UDFToDouble(key) = 1.0D)) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), c_float (type: float) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: float) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 - residual filter predicates: {(((_col1 + _col4) > 2.0) or _col2)} - Statistics: Num rows: 5 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: EXPLAIN -SELECT f.key, f.value, m.value -FROM src f JOIN src m ON(f.key = m.key AND m.value is not null AND m.value !='') -WHERE (f.value IN ('2008-04-08','2008-04-10') AND f.value IN ('2008-04-08','2008-04-09') AND m.value='2008-04-10') OR (m.value='2008-04-08') -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT f.key, f.value, m.value -FROM src f JOIN src m ON(f.key = m.key AND m.value is not null AND m.value !='') -WHERE (f.value IN ('2008-04-08','2008-04-10') AND f.value IN ('2008-04-08','2008-04-09') AND m.value='2008-04-10') OR (m.value='2008-04-08') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: f - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), (value) IN ('2008-04-08', '2008-04-10') (type: boolean), (value) IN ('2008-04-08', '2008-04-09') (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: boolean), _col3 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: m - filterExpr: ((value) IN ('2008-04-10', '2008-04-08') and (value <> '') and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((value) IN ('2008-04-10', '2008-04-08') and (value <> '') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), (value = '2008-04-10') (type: boolean), (value = '2008-04-08') (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: boolean), _col3 (type: boolean) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7 - residual filter predicates: {((_col2 and _col3 and _col6) or _col7)} - Statistics: Num rows: 2 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 538 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 538 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/filter_cond_pushdown2.q.out b/ql/src/test/results/clientpositive/llap/filter_cond_pushdown2.q.out deleted file mode 100644 index e99cad9543..0000000000 --- a/ql/src/test/results/clientpositive/llap/filter_cond_pushdown2.q.out +++ /dev/null @@ -1,316 +0,0 @@ -PREHOOK: query: drop table if exists users_table -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists users_table -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE users_table( - `field_1` int, - `field_2` string, - `field_3` boolean, - `field_4` boolean, - `field_5` boolean, - `field_6` boolean, - `field_7` boolean) -ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' -STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@users_table -POSTHOOK: query: CREATE TABLE users_table( - `field_1` int, - `field_2` string, - `field_3` boolean, - `field_4` boolean, - `field_5` boolean, - `field_6` boolean, - `field_7` boolean) -ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' -STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@users_table -PREHOOK: query: load data local inpath '../../data/files/small_csv.csv' into table users_table -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@users_table -POSTHOOK: query: load data local inpath '../../data/files/small_csv.csv' into table users_table -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@users_table -PREHOOK: query: explain -with all_hits as ( -select * from users_table -), -all_exposed_users as ( -select distinct -field_1, -field_2 -from all_hits -where field_3 -), -interacted as ( -select distinct -field_1, -field_2 -from all_hits -where field_4 -) -select -all_exposed_users.field_1, -count(*) as nr_exposed, -sum(if(interacted.field_2 is not null, 1, 0)) as nr_interacted -from all_exposed_users -left outer join interacted -on all_exposed_users.field_1 = interacted.field_1 -and all_exposed_users.field_2 = interacted.field_2 -group by all_exposed_users.field_1 -order by all_exposed_users.field_1 -PREHOOK: type: QUERY -PREHOOK: Input: default@users_table -#### A masked pattern was here #### -POSTHOOK: query: explain -with all_hits as ( -select * from users_table -), -all_exposed_users as ( -select distinct -field_1, -field_2 -from all_hits -where field_3 -), -interacted as ( -select distinct -field_1, -field_2 -from all_hits -where field_4 -) -select -all_exposed_users.field_1, -count(*) as nr_exposed, -sum(if(interacted.field_2 is not null, 1, 0)) as nr_interacted -from all_exposed_users -left outer join interacted -on all_exposed_users.field_1 = interacted.field_1 -and all_exposed_users.field_2 = interacted.field_2 -group by all_exposed_users.field_1 -order by all_exposed_users.field_1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@users_table -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: users_table - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: field_3 (type: boolean) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: field_1 (type: int), field_2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: users_table - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: field_4 (type: boolean) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: field_1 (type: int), field_2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(if(_col3 is not null, 1, 0)) - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: with all_hits as ( -select * from users_table -), -all_exposed_users as ( -select distinct -field_1, -field_2 -from all_hits -where field_3 -), -interacted as ( -select distinct -field_1, -field_2 -from all_hits -where field_4 -) -select -all_exposed_users.field_1, -count(*) as nr_exposed, -sum(if(interacted.field_2 is not null, 1, 0)) as nr_interacted -from all_exposed_users -left outer join interacted -on all_exposed_users.field_1 = interacted.field_1 -and all_exposed_users.field_2 = interacted.field_2 -group by all_exposed_users.field_1 -order by all_exposed_users.field_1 -PREHOOK: type: QUERY -PREHOOK: Input: default@users_table -#### A masked pattern was here #### -POSTHOOK: query: with all_hits as ( -select * from users_table -), -all_exposed_users as ( -select distinct -field_1, -field_2 -from all_hits -where field_3 -), -interacted as ( -select distinct -field_1, -field_2 -from all_hits -where field_4 -) -select -all_exposed_users.field_1, -count(*) as nr_exposed, -sum(if(interacted.field_2 is not null, 1, 0)) as nr_interacted -from all_exposed_users -left outer join interacted -on all_exposed_users.field_1 = interacted.field_1 -and all_exposed_users.field_2 = interacted.field_2 -group by all_exposed_users.field_1 -order by all_exposed_users.field_1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@users_table -#### A masked pattern was here #### -31651 1 1 -216051 1 0 -239413 1 1 -252028 1 0 -269414 1 0 -300632 1 1 -301813 1 0 -409611 1 1 -438598 1 0 -453386 1 0 -474061 1 1 -537646 1 0 -575004 1 1 -676296 1 1 -715496 1 1 -750354 1 0 -804209 1 1 -807477 1 0 diff --git a/ql/src/test/results/clientpositive/llap/filter_cond_pushdown_HIVE_15647.q.out b/ql/src/test/results/clientpositive/llap/filter_cond_pushdown_HIVE_15647.q.out deleted file mode 100644 index f26100c20a..0000000000 --- a/ql/src/test/results/clientpositive/llap/filter_cond_pushdown_HIVE_15647.q.out +++ /dev/null @@ -1,440 +0,0 @@ -PREHOOK: query: CREATE TABLE sales_HIVE_15647 (store_id INTEGER, store_number INTEGER, customer_id INTEGER) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@sales_HIVE_15647 -POSTHOOK: query: CREATE TABLE sales_HIVE_15647 (store_id INTEGER, store_number INTEGER, customer_id INTEGER) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@sales_HIVE_15647 -PREHOOK: query: CREATE TABLE store_HIVE_15647 (store_id INTEGER, salad_bar BOOLEAN) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@store_HIVE_15647 -POSTHOOK: query: CREATE TABLE store_HIVE_15647 (store_id INTEGER, salad_bar BOOLEAN) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@store_HIVE_15647 -PREHOOK: query: explain select count(*) from -sales_HIVE_15647 as sales -join store_HIVE_15647 as store on sales.store_id = store.store_id -where ((store.salad_bar)) and ((sales.store_number) <=> (sales.customer_id)) -PREHOOK: type: QUERY -PREHOOK: Input: default@sales_hive_15647 -PREHOOK: Input: default@store_hive_15647 -#### A masked pattern was here #### -POSTHOOK: query: explain select count(*) from -sales_HIVE_15647 as sales -join store_HIVE_15647 as store on sales.store_id = store.store_id -where ((store.salad_bar)) and ((sales.store_number) <=> (sales.customer_id)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@sales_hive_15647 -POSTHOOK: Input: default@store_hive_15647 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: sales - filterExpr: ((store_number IS NOT DISTINCT FROM customer_id) and store_id is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((store_number IS NOT DISTINCT FROM customer_id) and store_id is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: store_id (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: store_id (type: int) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: store - filterExpr: (store_id is not null and salad_bar) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (store_id is not null and salad_bar) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: store_id (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: store_id (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 store_id (type: int) - 1 store_id (type: int) - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select count(*) from -sales_HIVE_15647 as sales -join store_HIVE_15647 as store on sales.store_id = store.store_id -where ((store.salad_bar)) and ((sales.store_number) = (sales.customer_id)) -PREHOOK: type: QUERY -PREHOOK: Input: default@sales_hive_15647 -PREHOOK: Input: default@store_hive_15647 -#### A masked pattern was here #### -POSTHOOK: query: explain select count(*) from -sales_HIVE_15647 as sales -join store_HIVE_15647 as store on sales.store_id = store.store_id -where ((store.salad_bar)) and ((sales.store_number) = (sales.customer_id)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@sales_hive_15647 -POSTHOOK: Input: default@store_hive_15647 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: sales - filterExpr: ((store_number = customer_id) and store_id is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((store_number = customer_id) and store_id is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: store_id (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: store_id (type: int) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: store - filterExpr: (store_id is not null and salad_bar) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (store_id is not null and salad_bar) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: store_id (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: store_id (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 store_id (type: int) - 1 store_id (type: int) - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select count(*) from -sales_HIVE_15647 as sales -join store_HIVE_15647 as store on sales.store_id = store.store_id -where ((store.salad_bar = true)) and ((sales.store_number) <=> (sales.customer_id)) -PREHOOK: type: QUERY -PREHOOK: Input: default@sales_hive_15647 -PREHOOK: Input: default@store_hive_15647 -#### A masked pattern was here #### -POSTHOOK: query: explain select count(*) from -sales_HIVE_15647 as sales -join store_HIVE_15647 as store on sales.store_id = store.store_id -where ((store.salad_bar = true)) and ((sales.store_number) <=> (sales.customer_id)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@sales_hive_15647 -POSTHOOK: Input: default@store_hive_15647 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: sales - filterExpr: ((store_number IS NOT DISTINCT FROM customer_id) and store_id is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((store_number IS NOT DISTINCT FROM customer_id) and store_id is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: store_id (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: store_id (type: int) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: store - filterExpr: (store_id is not null and (salad_bar = true)) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (store_id is not null and (salad_bar = true)) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: store_id (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: store_id (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 store_id (type: int) - 1 store_id (type: int) - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select count(*) from -sales_HIVE_15647 as sales -join store_HIVE_15647 as store on sales.store_id = store.store_id -where ((store.salad_bar = false)) and ((sales.store_number) > (sales.customer_id)) -PREHOOK: type: QUERY -PREHOOK: Input: default@sales_hive_15647 -PREHOOK: Input: default@store_hive_15647 -#### A masked pattern was here #### -POSTHOOK: query: explain select count(*) from -sales_HIVE_15647 as sales -join store_HIVE_15647 as store on sales.store_id = store.store_id -where ((store.salad_bar = false)) and ((sales.store_number) > (sales.customer_id)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@sales_hive_15647 -POSTHOOK: Input: default@store_hive_15647 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: sales - filterExpr: (store_id is not null and (store_number > customer_id)) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (store_id is not null and (store_number > customer_id)) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: store_id (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: store_id (type: int) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: store - filterExpr: (store_id is not null and (salad_bar = false)) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (store_id is not null and (salad_bar = false)) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: store_id (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: store_id (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 store_id (type: int) - 1 store_id (type: int) - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/filter_in_or_dup.q.out b/ql/src/test/results/clientpositive/llap/filter_in_or_dup.q.out deleted file mode 100644 index 3186cbc3a5..0000000000 --- a/ql/src/test/results/clientpositive/llap/filter_in_or_dup.q.out +++ /dev/null @@ -1,108 +0,0 @@ -PREHOOK: query: EXPLAIN -SELECT f.key -FROM cbo_t1 f -WHERE (f.key = '1' OR f.key='2') -AND f.key IN ('1', '2') -PREHOOK: type: QUERY -PREHOOK: Input: default@cbo_t1 -PREHOOK: Input: default@cbo_t1@dt=2014 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT f.key -FROM cbo_t1 f -WHERE (f.key = '1' OR f.key='2') -AND f.key IN ('1', '2') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@cbo_t1 -POSTHOOK: Input: default@cbo_t1@dt=2014 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: f - filterExpr: (key) IN ('1', '2') (type: boolean) - Filter Operator - predicate: (key) IN ('1', '2') (type: boolean) - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: EXPLAIN -SELECT f.key -FROM cbo_t1 f -WHERE (f.key = '1' OR f.key = '2') -AND f.key IN ('1', '2', '3') -PREHOOK: type: QUERY -PREHOOK: Input: default@cbo_t1 -PREHOOK: Input: default@cbo_t1@dt=2014 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT f.key -FROM cbo_t1 f -WHERE (f.key = '1' OR f.key = '2') -AND f.key IN ('1', '2', '3') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@cbo_t1 -POSTHOOK: Input: default@cbo_t1@dt=2014 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: f - filterExpr: (key) IN ('1', '2') (type: boolean) - Filter Operator - predicate: (key) IN ('1', '2') (type: boolean) - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: EXPLAIN -SELECT f.key -FROM cbo_t1 f -WHERE (f.key = '1' OR f.key='2' OR f.key='3') -AND f.key IN ('1', '2') -PREHOOK: type: QUERY -PREHOOK: Input: default@cbo_t1 -PREHOOK: Input: default@cbo_t1@dt=2014 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT f.key -FROM cbo_t1 f -WHERE (f.key = '1' OR f.key='2' OR f.key='3') -AND f.key IN ('1', '2') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@cbo_t1 -POSTHOOK: Input: default@cbo_t1@dt=2014 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: f - filterExpr: (key) IN ('1', '2') (type: boolean) - Filter Operator - predicate: (key) IN ('1', '2') (type: boolean) - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/flatten_and_or.q.out b/ql/src/test/results/clientpositive/llap/flatten_and_or.q.out deleted file mode 100644 index 421bbf6efa..0000000000 --- a/ql/src/test/results/clientpositive/llap/flatten_and_or.q.out +++ /dev/null @@ -1,56 +0,0 @@ -PREHOOK: query: explain -SELECT key -FROM src -WHERE - ((key = '0' - AND value = '8') OR (key = '1' - AND value = '5') OR (key = '2' - AND value = '6') OR (key = '3' - AND value = '8') OR (key = '4' - AND value = '1') OR (key = '5' - AND value = '6') OR (key = '6' - AND value = '1') OR (key = '7' - AND value = '1') OR (key = '8' - AND value = '1') OR (key = '9' - AND value = '1') OR (key = '10' - AND value = '3')) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT key -FROM src -WHERE - ((key = '0' - AND value = '8') OR (key = '1' - AND value = '5') OR (key = '2' - AND value = '6') OR (key = '3' - AND value = '8') OR (key = '4' - AND value = '1') OR (key = '5' - AND value = '6') OR (key = '6' - AND value = '1') OR (key = '7' - AND value = '1') OR (key = '8' - AND value = '1') OR (key = '9' - AND value = '1') OR (key = '10' - AND value = '3')) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - filterExpr: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean) - Filter Operator - predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean) - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/fold_case.q.out b/ql/src/test/results/clientpositive/llap/fold_case.q.out deleted file mode 100644 index b748a46cfe..0000000000 --- a/ql/src/test/results/clientpositive/llap/fold_case.q.out +++ /dev/null @@ -1,629 +0,0 @@ -PREHOOK: query: explain -select count(1) from src where (case key when '238' then true else false end) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select count(1) from src where (case key when '238' then true else false end) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (key = '238') (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = '238') (type: boolean) - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select count(1) from src where (case key when '238' then 1=2 else 1=1 end) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select count(1) from src where (case key when '238' then 1=2 else 1=1 end) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (key = '238') is not true (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = '238') is not true (type: boolean) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select count(1) from src where (case key when '238' then 1=2 else 1=31 end) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select count(1) from src where (case key when '238' then 1=2 else 1=31 end) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select count(1) from src where (case key when '238' then true else 1=1 end) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select count(1) from src where (case key when '238' then true else 1=1 end) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select count(1) from src where (case key when '238' then 1=1 else 1=null end) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select count(1) from src where (case key when '238' then 1=1 else 1=null end) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (key = '238') (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = '238') (type: boolean) - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select count(1) from src where (case key when '238' then 1=null end) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select count(1) from src where (case key when '238' then 1=null end) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select count(1) from src where (case key when '238' then 2 = cast('2' as bigint) end) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select count(1) from src where (case key when '238' then 2 = cast('2' as bigint) end) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (key = '238') (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = '238') (type: boolean) - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select (case key when '238' then null else false end) from src where (case key when '238' then 2 = cast('1' as bigint) else true end) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select (case key when '238' then null else false end) from src where (case key when '238' then 2 = cast('1' as bigint) else true end) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - filterExpr: (key = '238') is not true (type: boolean) - Filter Operator - predicate: (key = '238') is not true (type: boolean) - Select Operator - expressions: ((key = '238') is true and null) (type: boolean) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: explain -select (case key when '238' then null else null end) from src where (case key when '238' then 2 = null else 3 = null end) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select (case key when '238' then null else null end) from src where (case key when '238' then 2 = null else 3 = null end) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - Select Operator - Limit - Number of rows: 0 - Select Operator - expressions: null (type: void) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: explain -select count(1) from src where (case key when '238' then null else 1=1 end) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select count(1) from src where (case key when '238' then null else 1=1 end) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (key = '238') is not true (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = '238') is not true (type: boolean) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select (CASE WHEN (-2) >= 0 THEN SUBSTRING(key, 1,CAST((-2) AS INT)) ELSE NULL END) -from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select (CASE WHEN (-2) >= 0 THEN SUBSTRING(key, 1,CAST((-2) AS INT)) ELSE NULL END) -from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - Select Operator - expressions: null (type: string) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: explain -select (CASE WHEN key = value THEN '1' WHEN true THEN '0' ELSE NULL END) -from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select (CASE WHEN key = value THEN '1' WHEN true THEN '0' ELSE NULL END) -from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - Select Operator - expressions: CASE WHEN ((key = value)) THEN ('1') ELSE ('0') END (type: string) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: explain -select (case when true then key when not true then to_date(key) else null end) from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select (case when true then key when not true then to_date(key) else null end) from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/fold_eq_with_case_when.q.out b/ql/src/test/results/clientpositive/llap/fold_eq_with_case_when.q.out deleted file mode 100644 index c6d0a6faa5..0000000000 --- a/ql/src/test/results/clientpositive/llap/fold_eq_with_case_when.q.out +++ /dev/null @@ -1,200 +0,0 @@ -PREHOOK: query: explain -SELECT -SUM((CASE WHEN 1000000 = 0 THEN NULL ELSE l_partkey / 1000000 END)), -SUM(1) AS `sum_number_of_records_ok` FROM lineitem -WHERE -(((CASE WHEN ('N' = l_returnflag) THEN 1 ELSE 1 END) = 1) AND -((CASE WHEN ('MAIL' = l_shipmode) THEN 1 ELSE 1 END) = 1) AND -((CASE WHEN ('O' = l_linestatus) THEN 1 ELSE 1 END) = 1) AND -((CASE WHEN ('NONE' = l_shipinstruct) THEN 1 ELSE 1 END) = 1) AND -((CASE WHEN ('All' = (CASE WHEN (l_shipmode = 'TRUCK') THEN 'East' WHEN (l_shipmode = 'MAIL') THEN 'West' WHEN (l_shipmode = 'REG AIR') THEN 'BizDev' ELSE 'Other' END)) THEN 1 ELSE 1 END) = 1) AND -((CASE WHEN ('AIR' = l_shipmode) THEN 1 ELSE 1 END) = 1) AND -((CASE WHEN ('1996-03-30' = TO_DATE(l_shipdate)) THEN 1 ELSE NULL END) = 1) AND -((CASE WHEN ('RAIL' = l_shipmode) THEN 1 ELSE NULL END) = 1) AND (1 = 1) AND -((CASE WHEN (1 = l_linenumber) THEN 1 ELSE 1 END) = 1) AND (1 = 1)) -GROUP BY l_orderkey -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT -SUM((CASE WHEN 1000000 = 0 THEN NULL ELSE l_partkey / 1000000 END)), -SUM(1) AS `sum_number_of_records_ok` FROM lineitem -WHERE -(((CASE WHEN ('N' = l_returnflag) THEN 1 ELSE 1 END) = 1) AND -((CASE WHEN ('MAIL' = l_shipmode) THEN 1 ELSE 1 END) = 1) AND -((CASE WHEN ('O' = l_linestatus) THEN 1 ELSE 1 END) = 1) AND -((CASE WHEN ('NONE' = l_shipinstruct) THEN 1 ELSE 1 END) = 1) AND -((CASE WHEN ('All' = (CASE WHEN (l_shipmode = 'TRUCK') THEN 'East' WHEN (l_shipmode = 'MAIL') THEN 'West' WHEN (l_shipmode = 'REG AIR') THEN 'BizDev' ELSE 'Other' END)) THEN 1 ELSE 1 END) = 1) AND -((CASE WHEN ('AIR' = l_shipmode) THEN 1 ELSE 1 END) = 1) AND -((CASE WHEN ('1996-03-30' = TO_DATE(l_shipdate)) THEN 1 ELSE NULL END) = 1) AND -((CASE WHEN ('RAIL' = l_shipmode) THEN 1 ELSE NULL END) = 1) AND (1 = 1) AND -((CASE WHEN (1 = l_linenumber) THEN 1 ELSE 1 END) = 1) AND (1 = 1)) -GROUP BY l_orderkey -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: lineitem - filterExpr: ((l_shipmode = 'RAIL') and (DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP)))) (type: boolean) - Statistics: Num rows: 100 Data size: 19000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((l_shipmode = 'RAIL') and (DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP)))) (type: boolean) - Statistics: Num rows: 7 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: l_orderkey (type: int), (UDFToDouble(l_partkey) / 1000000.0D) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1), sum(1) - keys: _col0 (type: int) - minReductionHashAggr: 0.57142854 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double), _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: double), _col2 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select key from src where (case key when '238' then 1 else 2 end) = 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select key from src where (case key when '238' then 1 else 2 end) = 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - filterExpr: (key = '238') (type: boolean) - Filter Operator - predicate: (key = '238') (type: boolean) - Select Operator - expressions: '238' (type: string) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: explain select key from src where (case key when '238' then 1 when '94' then 1 else 3 end) = cast('1' as int) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select key from src where (case key when '238' then 1 when '94' then 1 else 3 end) = cast('1' as int) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - filterExpr: (key) IN ('238', '94') (type: boolean) - Filter Operator - predicate: (key) IN ('238', '94') (type: boolean) - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: explain select key from src where (case key when '238' then 1 else 2 end) = (case when key != '238' then 1 else 1 end) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select key from src where (case key when '238' then 1 else 2 end) = (case when key != '238' then 1 else 1 end) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - filterExpr: (key = '238') (type: boolean) - Filter Operator - predicate: (key = '238') (type: boolean) - Select Operator - expressions: '238' (type: string) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: explain select key from src where (case key when '238' then 1 end) = (case when key != '238' then 1 when key = '23' then 1 end) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain select key from src where (case key when '238' then 1 end) = (case when key != '238' then 1 when key = '23' then 1 end) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 0 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/fold_to_null.q.out b/ql/src/test/results/clientpositive/llap/fold_to_null.q.out deleted file mode 100644 index d6749dace2..0000000000 --- a/ql/src/test/results/clientpositive/llap/fold_to_null.q.out +++ /dev/null @@ -1,283 +0,0 @@ -PREHOOK: query: create table t (a int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t -POSTHOOK: query: create table t (a int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t -PREHOOK: query: create table t2 (b int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t2 -POSTHOOK: query: create table t2 (b int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t2 -PREHOOK: query: create table t3 (c int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t3 -POSTHOOK: query: create table t3 (c int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t3 -PREHOOK: query: insert into t values(3),(10) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values(3),(10) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.a SCRIPT [] -Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -PREHOOK: query: explain select a from t,t2,t3 where - (a>3 and null between 0 and 10) is null -PREHOOK: type: QUERY -PREHOOK: Input: default@t -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: explain select a from t,t2,t3 where - (a>3 and null between 0 and 10) is null -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) - Reducer 3 <- Map 5 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t - filterExpr: ((a > 3) and null) is null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((a > 3) and null) is null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: t3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 5 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 6 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 6 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain select a from t,t2,t3 where - (a>5 or null between 0 and 10) and (a*a < 101) - and t.a=t2.b and t.a=t3.c -PREHOOK: type: QUERY -PREHOOK: Input: default@t -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: explain select a from t,t2,t3 where - (a>5 or null between 0 and 10) and (a*a < 101) - and t.a=t2.b and t.a=t3.c -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t - filterExpr: ((a > 5) and ((a * a) < 101)) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((a > 5) and ((a * a) < 101)) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: t2 - filterExpr: ((b > 5) and ((b * b) < 101)) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((b > 5) and ((b * b) < 101)) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: b (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: t3 - filterExpr: ((c > 5) and ((c * c) < 101)) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((c > 5) and ((c * c) < 101)) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - diff --git a/ql/src/test/results/clientpositive/llap/folder_predicate.q.out b/ql/src/test/results/clientpositive/llap/folder_predicate.q.out deleted file mode 100644 index fbca1437c3..0000000000 --- a/ql/src/test/results/clientpositive/llap/folder_predicate.q.out +++ /dev/null @@ -1,298 +0,0 @@ -PREHOOK: query: drop table if exists predicate_fold_tb -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists predicate_fold_tb -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table predicate_fold_tb(value int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@predicate_fold_tb -POSTHOOK: query: create table predicate_fold_tb(value int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@predicate_fold_tb -PREHOOK: query: insert into predicate_fold_tb values(NULL), (1), (2), (3), (4), (5) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@predicate_fold_tb -POSTHOOK: query: insert into predicate_fold_tb values(NULL), (1), (2), (3), (4), (5) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@predicate_fold_tb -POSTHOOK: Lineage: predicate_fold_tb.value SCRIPT [] -PREHOOK: query: explain -SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value = 3) -PREHOOK: type: QUERY -PREHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value = 3) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: predicate_fold_tb - filterExpr: ((value <> 3) or value is null) (type: boolean) - Filter Operator - predicate: ((value <> 3) or value is null) (type: boolean) - Select Operator - expressions: value (type: int) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value = 3) -PREHOOK: type: QUERY -PREHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value = 3) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -NULL -1 -2 -4 -5 -PREHOOK: query: explain -SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value >= 3) -PREHOOK: type: QUERY -PREHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value >= 3) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: predicate_fold_tb - filterExpr: (value is null or (value < 3)) (type: boolean) - Filter Operator - predicate: (value is null or (value < 3)) (type: boolean) - Select Operator - expressions: value (type: int) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value >= 3) -PREHOOK: type: QUERY -PREHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value >= 3) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -NULL -1 -2 -PREHOOK: query: explain -SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <= 3) -PREHOOK: type: QUERY -PREHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <= 3) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: predicate_fold_tb - filterExpr: (value is null or (value > 3)) (type: boolean) - Filter Operator - predicate: (value is null or (value > 3)) (type: boolean) - Select Operator - expressions: value (type: int) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <= 3) -PREHOOK: type: QUERY -PREHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <= 3) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -NULL -4 -5 -PREHOOK: query: explain -SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 3) -PREHOOK: type: QUERY -PREHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 3) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: predicate_fold_tb - filterExpr: (value is null or (value <= 3)) (type: boolean) - Filter Operator - predicate: (value is null or (value <= 3)) (type: boolean) - Select Operator - expressions: value (type: int) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 3) -PREHOOK: type: QUERY -PREHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 3) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -NULL -1 -2 -3 -PREHOOK: query: explain -SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value < 3) -PREHOOK: type: QUERY -PREHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value < 3) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: predicate_fold_tb - filterExpr: (value is null or (value >= 3)) (type: boolean) - Filter Operator - predicate: (value is null or (value >= 3)) (type: boolean) - Select Operator - expressions: value (type: int) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value < 3) -PREHOOK: type: QUERY -PREHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value < 3) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -NULL -3 -4 -5 -PREHOOK: query: explain -SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <> 3) -PREHOOK: type: QUERY -PREHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <> 3) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: predicate_fold_tb - filterExpr: (value is null or (value = 3)) (type: boolean) - Filter Operator - predicate: (value is null or (value = 3)) (type: boolean) - Select Operator - expressions: value (type: int) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <> 3) -PREHOOK: type: QUERY -PREHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value <> 3) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -NULL -3 -PREHOOK: query: explain -SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 1 AND value <=3) -PREHOOK: type: QUERY -PREHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -POSTHOOK: query: explain -SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 1 AND value <=3) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: predicate_fold_tb - filterExpr: (value is null or (value <= 1) or (value > 3)) (type: boolean) - Filter Operator - predicate: (value is null or (value <= 1) or (value > 3)) (type: boolean) - Select Operator - expressions: value (type: int) - outputColumnNames: _col0 - ListSink - -PREHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 1 AND value <=3) -PREHOOK: type: QUERY -PREHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM predicate_fold_tb WHERE not(value IS NOT NULL AND value > 1 AND value <=3) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@predicate_fold_tb -#### A masked pattern was here #### -NULL -1 -4 -5 diff --git a/ql/src/test/results/clientpositive/llap/fp_literal_arithmetic.q.out b/ql/src/test/results/clientpositive/llap/fp_literal_arithmetic.q.out deleted file mode 100644 index 01e69e5a2a..0000000000 --- a/ql/src/test/results/clientpositive/llap/fp_literal_arithmetic.q.out +++ /dev/null @@ -1,406 +0,0 @@ -PREHOOK: query: explain -select sum(l_extendedprice) from lineitem q0 where l_discount -between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: explain -select sum(l_extendedprice) from lineitem q0 where l_discount -between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: q0 - filterExpr: l_discount BETWEEN 0.05D AND 0.07D (type: boolean) - Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: l_discount BETWEEN 0.05D AND 0.07D (type: boolean) - Statistics: Num rows: 35 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: l_extendedprice (type: double) - outputColumnNames: l_extendedprice - Statistics: Num rows: 35 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(l_extendedprice) - minReductionHashAggr: 0.9714286 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(l_extendedprice) from lineitem q0 where l_discount -between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: select sum(l_extendedprice) from lineitem q0 where l_discount -between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -1201714.81 -PREHOOK: query: explain -select sum(l_extendedprice) from lineitem q1 where l_discount -between 0.06 - 0.01 and 0.06 + 0.01 -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: explain -select sum(l_extendedprice) from lineitem q1 where l_discount -between 0.06 - 0.01 and 0.06 + 0.01 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: q1 - filterExpr: l_discount BETWEEN 0.05D AND 0.07D (type: boolean) - Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: l_discount BETWEEN 0.05D AND 0.07D (type: boolean) - Statistics: Num rows: 35 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: l_extendedprice (type: double) - outputColumnNames: l_extendedprice - Statistics: Num rows: 35 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(l_extendedprice) - minReductionHashAggr: 0.9714286 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(l_extendedprice) from lineitem q1 where l_discount -between 0.06 - 0.01 and 0.06 + 0.01 -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: select sum(l_extendedprice) from lineitem q1 where l_discount -between 0.06 - 0.01 and 0.06 + 0.01 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -1201714.81 -PREHOOK: query: select sum(l_extendedprice) from lineitem q2 where l_discount -between 0.05 and 0.07 -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: select sum(l_extendedprice) from lineitem q2 where l_discount -between 0.05 and 0.07 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -1201714.81 -PREHOOK: query: select sum(l_extendedprice) from lineitem q3 where l_discount -between (cast('0.06' as decimal(3,2)) - cast('0.01' as decimal(3,2))) - and (cast('0.06' as decimal(3,2)) + cast('0.01' as decimal(3,2))) -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: select sum(l_extendedprice) from lineitem q3 where l_discount -between (cast('0.06' as decimal(3,2)) - cast('0.01' as decimal(3,2))) - and (cast('0.06' as decimal(3,2)) + cast('0.01' as decimal(3,2))) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -1201714.81 -PREHOOK: query: select sum(l_extendedprice) from lineitem q4 where l_discount -between (cast('0.06' as decimal(3,2)) - cast('0.01' as double)) - and (cast('0.06' as decimal(3,2)) + cast('0.01' as double)) -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: select sum(l_extendedprice) from lineitem q4 where l_discount -between (cast('0.06' as decimal(3,2)) - cast('0.01' as double)) - and (cast('0.06' as decimal(3,2)) + cast('0.01' as double)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -960361.91 -PREHOOK: query: explain -select sum(l_extendedprice) from lineitem q10 where l_discount -between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: explain -select sum(l_extendedprice) from lineitem q10 where l_discount -between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: q10 - filterExpr: l_discount BETWEEN CAST( '0.05' AS decimal(3,2)) AND CAST( '0.07' AS decimal(3,2)) (type: boolean) - Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: l_discount BETWEEN CAST( '0.05' AS decimal(3,2)) AND CAST( '0.07' AS decimal(3,2)) (type: boolean) - Statistics: Num rows: 11 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: l_extendedprice (type: double) - outputColumnNames: l_extendedprice - Statistics: Num rows: 11 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(l_extendedprice) - minReductionHashAggr: 0.9090909 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(l_extendedprice) from lineitem q10 where l_discount -between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: select sum(l_extendedprice) from lineitem q10 where l_discount -between cast('0.05' as decimal(3,2)) and cast('0.07' as decimal(3,2)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -1201714.81 -PREHOOK: query: explain -select sum(l_extendedprice) from lineitem q11 where l_discount -between 0.06 - 0.01 and 0.06 + 0.01 -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: explain -select sum(l_extendedprice) from lineitem q11 where l_discount -between 0.06 - 0.01 and 0.06 + 0.01 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: q11 - filterExpr: l_discount BETWEEN (0.06 - 0.01) AND (0.06 + 0.01) (type: boolean) - Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: l_discount BETWEEN (0.06 - 0.01) AND (0.06 + 0.01) (type: boolean) - Statistics: Num rows: 11 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: l_extendedprice (type: double) - outputColumnNames: l_extendedprice - Statistics: Num rows: 11 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(l_extendedprice) - minReductionHashAggr: 0.9090909 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select sum(l_extendedprice) from lineitem q11 where l_discount -between 0.06 - 0.01 and 0.06 + 0.01 -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: select sum(l_extendedprice) from lineitem q11 where l_discount -between 0.06 - 0.01 and 0.06 + 0.01 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -1201714.81 -PREHOOK: query: select sum(l_extendedprice) from lineitem q12 where l_discount -between 0.05 and 0.07 -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: select sum(l_extendedprice) from lineitem q12 where l_discount -between 0.05 and 0.07 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -1201714.81 -PREHOOK: query: select sum(l_extendedprice) from lineitem q13 where l_discount -between (cast('0.06' as decimal(3,2)) - cast('0.01' as decimal(3,2))) - and (cast('0.06' as decimal(3,2)) + cast('0.01' as decimal(3,2))) -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: select sum(l_extendedprice) from lineitem q13 where l_discount -between (cast('0.06' as decimal(3,2)) - cast('0.01' as decimal(3,2))) - and (cast('0.06' as decimal(3,2)) + cast('0.01' as decimal(3,2))) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -1201714.81 -PREHOOK: query: select sum(l_extendedprice) from lineitem q14 where l_discount -between (cast('0.06' as decimal(3,2)) - cast('0.01' as double)) - and (cast('0.06' as decimal(3,2)) + cast('0.01' as double)) -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: select sum(l_extendedprice) from lineitem q14 where l_discount -between (cast('0.06' as decimal(3,2)) - cast('0.01' as double)) - and (cast('0.06' as decimal(3,2)) + cast('0.01' as double)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -960361.91 diff --git a/ql/src/test/results/clientpositive/llap/gby_star.q.out b/ql/src/test/results/clientpositive/llap/gby_star.q.out deleted file mode 100644 index 9bc78d888d..0000000000 --- a/ql/src/test/results/clientpositive/llap/gby_star.q.out +++ /dev/null @@ -1,499 +0,0 @@ -PREHOOK: query: explain -select *, sum(key) from src group by key, value order by key, value limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select *, sum(key) from src group by key, value order by key, value limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: key (type: string), value (type: string) - null sort order: zz - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(key) - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: double) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1860 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1860 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select *, sum(key) from src group by key, value order by key, value limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select *, sum(key) from src group by key, value order by key, value limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 val_0 0.0 -10 val_10 10.0 -100 val_100 200.0 -103 val_103 206.0 -104 val_104 208.0 -105 val_105 105.0 -11 val_11 11.0 -111 val_111 111.0 -113 val_113 226.0 -114 val_114 114.0 -PREHOOK: query: explain -select *, sum(key) from src where key < 100 group by key, value order by key, value limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select *, sum(key) from src where key < 100 group by key, value order by key, value limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: key (type: string), value (type: string) - null sort order: zz - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: sum(key) - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: double) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1860 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1860 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select *, sum(key) from src where key < 100 group by key, value order by key, value limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select *, sum(key) from src where key < 100 group by key, value order by key, value limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 val_0 0.0 -10 val_10 10.0 -11 val_11 11.0 -12 val_12 24.0 -15 val_15 30.0 -17 val_17 17.0 -18 val_18 36.0 -19 val_19 19.0 -2 val_2 2.0 -20 val_20 20.0 -PREHOOK: query: explain -select *, sum(key) from (select key from src where key < 100) a group by key order by key limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select *, sum(key) from (select key from src where key < 100) a group by key order by key limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) - null sort order: z - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: sum(key) - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select *, sum(key) from (select key from src where key < 100) a group by key order by key limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select *, sum(key) from (select key from src where key < 100) a group by key order by key limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 0.0 -10 10.0 -11 11.0 -12 24.0 -15 30.0 -17 17.0 -18 36.0 -19 19.0 -2 2.0 -20 20.0 -PREHOOK: query: explain -select a.*, sum(src.key) from (select key from src where key < 100) a -inner join src on a.key = src.key group by a.key order by a.key limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain -select a.*, sum(src.key) from (select key from src where key < 100) a -inner join src on a.key = src.key group by a.key order by a.key limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: src - filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (UDFToDouble(key) < 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 28884 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 166 Data size: 28884 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double) - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select a.*, sum(src.key) from (select key from src where key < 100) a -inner join src on a.key = src.key group by a.key order by a.key limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select a.*, sum(src.key) from (select key from src where key < 100) a -inner join src on a.key = src.key group by a.key order by a.key limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 0.0 -10 10.0 -11 11.0 -12 48.0 -15 60.0 -17 17.0 -18 72.0 -19 19.0 -2 2.0 -20 20.0 diff --git a/ql/src/test/results/clientpositive/llap/groupby10.q.out b/ql/src/test/results/clientpositive/llap/groupby10.q.out deleted file mode 100644 index d92bcb3d80..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby10.q.out +++ /dev/null @@ -1,952 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n0(key INT, val1 INT, val2 INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n0 -POSTHOOK: query: CREATE TABLE dest1_n0(key INT, val1 INT, val2 INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n0 -PREHOOK: query: CREATE TABLE dest2(key INT, val1 INT, val2 INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest2 -POSTHOOK: query: CREATE TABLE dest2(key INT, val1 INT, val2 INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest2 -PREHOOK: query: CREATE TABLE INPUT(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@INPUT -POSTHOOK: query: CREATE TABLE INPUT(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@INPUT -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE INPUT -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@input -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE INPUT -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@input -PREHOOK: query: EXPLAIN -FROM INPUT -INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -PREHOOK: type: QUERY -PREHOOK: Input: default@input -PREHOOK: Output: default@dest1_n0 -PREHOOK: Output: default@dest2 -POSTHOOK: query: EXPLAIN -FROM INPUT -INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@input -POSTHOOK: Output: default@dest1_n0 -POSTHOOK: Output: default@dest2 -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 1 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: input - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), substr(value, 5) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), substr(value, 5) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: int) - mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: int) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n0 - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - outputColumnNames: key, val1, val2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), val1 (type: int), val2 (type: int) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') - mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: int) - mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: int) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - outputColumnNames: key, val1, val2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), val1 (type: int), val2 (type: int) - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') - mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n0 - - Stage: Stage-4 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, val1, val2 - Column Types: int, int, int - Table: default.dest1_n0 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, val1, val2 - Column Types: int, int, int - Table: default.dest2 - -PREHOOK: query: FROM INPUT -INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -PREHOOK: type: QUERY -PREHOOK: Input: default@input -PREHOOK: Output: default@dest1_n0 -PREHOOK: Output: default@dest2 -POSTHOOK: query: FROM INPUT -INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@input -POSTHOOK: Output: default@dest1_n0 -POSTHOOK: Output: default@dest2 -POSTHOOK: Lineage: dest1_n0.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: dest1_n0.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: dest1_n0.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: SELECT * from dest1_n0 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n0 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * from dest1_n0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n0 -#### A masked pattern was here #### -128 1 1 -150 1 1 -165 1 1 -193 1 1 -213 3 2 -224 1 1 -238 3 3 -255 1 1 -265 1 1 -27 1 1 -273 1 1 -278 1 1 -311 1 1 -369 1 1 -401 1 1 -409 1 1 -484 1 1 -66 1 1 -86 1 1 -98 1 1 -PREHOOK: query: SELECT * from dest2 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * from dest2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2 -#### A masked pattern was here #### -128 128 128 -150 150 150 -165 165 165 -193 193 193 -213 640 427 -224 224 224 -238 717 717 -255 255 255 -265 265 265 -27 27 27 -273 273 273 -278 278 278 -311 311 311 -369 369 369 -401 401 401 -409 409 409 -484 484 484 -66 66 66 -86 86 86 -98 98 98 -PREHOOK: query: EXPLAIN -FROM INPUT -INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -PREHOOK: type: QUERY -PREHOOK: Input: default@input -PREHOOK: Output: default@dest1_n0 -PREHOOK: Output: default@dest2 -POSTHOOK: query: EXPLAIN -FROM INPUT -INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@input -POSTHOOK: Output: default@dest1_n0 -POSTHOOK: Output: default@dest2 -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 1 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: input - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), substr(value, 5) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), substr(value, 5) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: int) - mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: int) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n0 - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - outputColumnNames: key, val1, val2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), val1 (type: int), val2 (type: int) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') - mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: int) - mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: int) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - outputColumnNames: key, val1, val2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), val1 (type: int), val2 (type: int) - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') - mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n0 - - Stage: Stage-4 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, val1, val2 - Column Types: int, int, int - Table: default.dest1_n0 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, val1, val2 - Column Types: int, int, int - Table: default.dest2 - -PREHOOK: query: FROM INPUT -INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -PREHOOK: type: QUERY -PREHOOK: Input: default@input -PREHOOK: Output: default@dest1_n0 -PREHOOK: Output: default@dest2 -POSTHOOK: query: FROM INPUT -INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@input -POSTHOOK: Output: default@dest1_n0 -POSTHOOK: Output: default@dest2 -POSTHOOK: Lineage: dest1_n0.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: dest1_n0.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: dest1_n0.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: SELECT * from dest1_n0 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n0 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * from dest1_n0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n0 -#### A masked pattern was here #### -128 1 1 -150 1 1 -165 1 1 -193 1 1 -213 3 2 -224 1 1 -238 3 3 -255 1 1 -265 1 1 -27 1 1 -273 1 1 -278 1 1 -311 1 1 -369 1 1 -401 1 1 -409 1 1 -484 1 1 -66 1 1 -86 1 1 -98 1 1 -PREHOOK: query: SELECT * from dest2 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * from dest2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2 -#### A masked pattern was here #### -128 128 128 -150 150 150 -165 165 165 -193 193 193 -213 640 427 -224 224 224 -238 717 717 -255 255 255 -265 265 265 -27 27 27 -273 273 273 -278 278 278 -311 311 311 -369 369 369 -401 401 401 -409 409 409 -484 484 484 -66 66 66 -86 86 86 -98 98 98 -PREHOOK: query: EXPLAIN -FROM INPUT -INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -PREHOOK: type: QUERY -PREHOOK: Input: default@input -PREHOOK: Output: default@dest1_n0 -PREHOOK: Output: default@dest2 -POSTHOOK: query: EXPLAIN -FROM INPUT -INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@input -POSTHOOK: Output: default@dest1_n0 -POSTHOOK: Output: default@dest2 -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: input - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), substr(value, 5) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Forward - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) - keys: KEY._col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n0 - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - outputColumnNames: key, val1, val2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), val1 (type: int), val2 (type: int) - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0) - keys: KEY._col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - outputColumnNames: key, val1, val2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), val1 (type: int), val2 (type: int) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n0 - - Stage: Stage-4 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, val1, val2 - Column Types: int, int, int - Table: default.dest1_n0 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, val1, val2 - Column Types: int, int, int - Table: default.dest2 - -PREHOOK: query: FROM INPUT -INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -PREHOOK: type: QUERY -PREHOOK: Input: default@input -PREHOOK: Output: default@dest1_n0 -PREHOOK: Output: default@dest2 -POSTHOOK: query: FROM INPUT -INSERT OVERWRITE TABLE dest1_n0 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@input -POSTHOOK: Output: default@dest1_n0 -POSTHOOK: Output: default@dest2 -POSTHOOK: Lineage: dest1_n0.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: dest1_n0.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: dest1_n0.val2 EXPRESSION [(input)input.null, ] -POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.null, ] -PREHOOK: query: SELECT * from dest1_n0 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n0 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * from dest1_n0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n0 -#### A masked pattern was here #### -128 128 1 -150 150 1 -165 165 1 -193 193 1 -213 427 2 -224 224 1 -238 717 3 -255 255 1 -265 265 1 -27 27 1 -273 273 1 -278 278 1 -311 311 1 -369 369 1 -401 401 1 -409 409 1 -484 484 1 -66 66 1 -86 86 1 -98 98 1 -PREHOOK: query: SELECT * from dest2 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * from dest2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2 -#### A masked pattern was here #### -128 128 128 -150 150 150 -165 165 165 -193 193 193 -213 427 213 -224 224 224 -238 717 239 -255 255 255 -265 265 265 -27 27 27 -273 273 273 -278 278 278 -311 311 311 -369 369 369 -401 401 401 -409 409 409 -484 484 484 -66 66 66 -86 86 86 -98 98 98 diff --git a/ql/src/test/results/clientpositive/llap/groupby11.q.out b/ql/src/test/results/clientpositive/llap/groupby11.q.out deleted file mode 100644 index 01c23f715e..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby11.q.out +++ /dev/null @@ -1,960 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n137(key STRING, val1 INT, val2 INT) partitioned by (ds string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n137 -POSTHOOK: query: CREATE TABLE dest1_n137(key STRING, val1 INT, val2 INT) partitioned by (ds string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n137 -PREHOOK: query: CREATE TABLE dest2_n36(key STRING, val1 INT, val2 INT) partitioned by (ds string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest2_n36 -POSTHOOK: query: CREATE TABLE dest2_n36(key STRING, val1 INT, val2 INT) partitioned by (ds string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest2_n36 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n137 partition(ds='111') - SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value -INSERT OVERWRITE TABLE dest2_n36 partition(ds='111') - SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n137@ds=111 -PREHOOK: Output: default@dest2_n36@ds=111 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n137 partition(ds='111') - SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value -INSERT OVERWRITE TABLE dest2_n36 partition(ds='111') - SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n137@ds=111 -POSTHOOK: Output: default@dest2_n36@ds=111 -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Map 1 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: value (type: string), key (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: substr(value, 5) (type: string), key (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: substr(value, 5) (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 53500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 53500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 307 Data size: 32849 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 307 Data size: 30393 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 307 Data size: 30393 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n137 - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) - outputColumnNames: key, val1, val2 - Statistics: Num rows: 307 Data size: 57102 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: '111' (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 307 Data size: 57102 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), val1 (type: int), val2 (type: int) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') - keys: '111' (type: string) - mode: partial1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: '111' (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: '111' (type: string) - Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - keys: '111' (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: partial1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 307 Data size: 31314 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 307 Data size: 28858 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 307 Data size: 28858 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n36 - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) - outputColumnNames: key, val1, val2 - Statistics: Num rows: 307 Data size: 55567 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: '111' (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 307 Data size: 55567 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), val1 (type: int), val2 (type: int) - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') - keys: '111' (type: string) - mode: partial1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: '111' (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: '111' (type: string) - Statistics: Num rows: 307 Data size: 422125 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - keys: '111' (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1407 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 111 - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n137 - - Stage: Stage-4 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, val1, val2 - Column Types: string, int, int - Table: default.dest1_n137 - - Stage: Stage-1 - Move Operator - tables: - partition: - ds 111 - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n36 - - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, val1, val2 - Column Types: string, int, int - Table: default.dest2_n36 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n137 partition(ds='111') - SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value -INSERT OVERWRITE TABLE dest2_n36 partition(ds='111') - SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n137@ds=111 -PREHOOK: Output: default@dest2_n36@ds=111 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n137 partition(ds='111') - SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value -INSERT OVERWRITE TABLE dest2_n36 partition(ds='111') - SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n137@ds=111 -POSTHOOK: Output: default@dest2_n36@ds=111 -POSTHOOK: Lineage: dest1_n137 PARTITION(ds=111).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n137 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n137 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2_n36 PARTITION(ds=111).key EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2_n36 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2_n36 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT * from dest1_n137 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n137 -PREHOOK: Input: default@dest1_n137@ds=111 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * from dest1_n137 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n137 -POSTHOOK: Input: default@dest1_n137@ds=111 -#### A masked pattern was here #### -val_0 3 1 111 -val_10 1 1 111 -val_100 2 1 111 -val_103 2 1 111 -val_104 2 1 111 -val_105 1 1 111 -val_11 1 1 111 -val_111 1 1 111 -val_113 2 1 111 -val_114 1 1 111 -val_116 1 1 111 -val_118 2 1 111 -val_119 3 1 111 -val_12 2 1 111 -val_120 2 1 111 -val_125 2 1 111 -val_126 1 1 111 -val_128 3 1 111 -val_129 2 1 111 -val_131 1 1 111 -val_133 1 1 111 -val_134 2 1 111 -val_136 1 1 111 -val_137 2 1 111 -val_138 4 1 111 -val_143 1 1 111 -val_145 1 1 111 -val_146 2 1 111 -val_149 2 1 111 -val_15 2 1 111 -val_150 1 1 111 -val_152 2 1 111 -val_153 1 1 111 -val_155 1 1 111 -val_156 1 1 111 -val_157 1 1 111 -val_158 1 1 111 -val_160 1 1 111 -val_162 1 1 111 -val_163 1 1 111 -val_164 2 1 111 -val_165 2 1 111 -val_166 1 1 111 -val_167 3 1 111 -val_168 1 1 111 -val_169 4 1 111 -val_17 1 1 111 -val_170 1 1 111 -val_172 2 1 111 -val_174 2 1 111 -val_175 2 1 111 -val_176 2 1 111 -val_177 1 1 111 -val_178 1 1 111 -val_179 2 1 111 -val_18 2 1 111 -val_180 1 1 111 -val_181 1 1 111 -val_183 1 1 111 -val_186 1 1 111 -val_187 3 1 111 -val_189 1 1 111 -val_19 1 1 111 -val_190 1 1 111 -val_191 2 1 111 -val_192 1 1 111 -val_193 3 1 111 -val_194 1 1 111 -val_195 2 1 111 -val_196 1 1 111 -val_197 2 1 111 -val_199 3 1 111 -val_2 1 1 111 -val_20 1 1 111 -val_200 2 1 111 -val_201 1 1 111 -val_202 1 1 111 -val_203 2 1 111 -val_205 2 1 111 -val_207 2 1 111 -val_208 3 1 111 -val_209 2 1 111 -val_213 2 1 111 -val_214 1 1 111 -val_216 2 1 111 -val_217 2 1 111 -val_218 1 1 111 -val_219 2 1 111 -val_221 2 1 111 -val_222 1 1 111 -val_223 2 1 111 -val_224 2 1 111 -val_226 1 1 111 -val_228 1 1 111 -val_229 2 1 111 -val_230 5 1 111 -val_233 2 1 111 -val_235 1 1 111 -val_237 2 1 111 -val_238 2 1 111 -val_239 2 1 111 -val_24 2 1 111 -val_241 1 1 111 -val_242 2 1 111 -val_244 1 1 111 -val_247 1 1 111 -val_248 1 1 111 -val_249 1 1 111 -val_252 1 1 111 -val_255 2 1 111 -val_256 2 1 111 -val_257 1 1 111 -val_258 1 1 111 -val_26 2 1 111 -val_260 1 1 111 -val_262 1 1 111 -val_263 1 1 111 -val_265 2 1 111 -val_266 1 1 111 -val_27 1 1 111 -val_272 2 1 111 -val_273 3 1 111 -val_274 1 1 111 -val_275 1 1 111 -val_277 4 1 111 -val_278 2 1 111 -val_28 1 1 111 -val_280 2 1 111 -val_281 2 1 111 -val_282 2 1 111 -val_283 1 1 111 -val_284 1 1 111 -val_285 1 1 111 -val_286 1 1 111 -val_287 1 1 111 -val_288 2 1 111 -val_289 1 1 111 -val_291 1 1 111 -val_292 1 1 111 -val_296 1 1 111 -val_298 3 1 111 -val_30 1 1 111 -val_302 1 1 111 -val_305 1 1 111 -val_306 1 1 111 -val_307 2 1 111 -val_308 1 1 111 -val_309 2 1 111 -val_310 1 1 111 -val_311 3 1 111 -val_315 1 1 111 -val_316 3 1 111 -val_317 2 1 111 -val_318 3 1 111 -val_321 2 1 111 -val_322 2 1 111 -val_323 1 1 111 -val_325 2 1 111 -val_327 3 1 111 -val_33 1 1 111 -val_331 2 1 111 -val_332 1 1 111 -val_333 2 1 111 -val_335 1 1 111 -val_336 1 1 111 -val_338 1 1 111 -val_339 1 1 111 -val_34 1 1 111 -val_341 1 1 111 -val_342 2 1 111 -val_344 2 1 111 -val_345 1 1 111 -val_348 5 1 111 -val_35 3 1 111 -val_351 1 1 111 -val_353 2 1 111 -val_356 1 1 111 -val_360 1 1 111 -val_362 1 1 111 -val_364 1 1 111 -val_365 1 1 111 -val_366 1 1 111 -val_367 2 1 111 -val_368 1 1 111 -val_369 3 1 111 -val_37 2 1 111 -val_373 1 1 111 -val_374 1 1 111 -val_375 1 1 111 -val_377 1 1 111 -val_378 1 1 111 -val_379 1 1 111 -val_382 2 1 111 -val_384 3 1 111 -val_386 1 1 111 -val_389 1 1 111 -val_392 1 1 111 -val_393 1 1 111 -val_394 1 1 111 -val_395 2 1 111 -val_396 3 1 111 -val_397 2 1 111 -val_399 2 1 111 -val_4 1 1 111 -val_400 1 1 111 -val_401 5 1 111 -val_402 1 1 111 -val_403 3 1 111 -val_404 2 1 111 -val_406 4 1 111 -val_407 1 1 111 -val_409 3 1 111 -val_41 1 1 111 -val_411 1 1 111 -val_413 2 1 111 -val_414 2 1 111 -val_417 3 1 111 -val_418 1 1 111 -val_419 1 1 111 -val_42 2 1 111 -val_421 1 1 111 -val_424 2 1 111 -val_427 1 1 111 -val_429 2 1 111 -val_43 1 1 111 -val_430 3 1 111 -val_431 3 1 111 -val_432 1 1 111 -val_435 1 1 111 -val_436 1 1 111 -val_437 1 1 111 -val_438 3 1 111 -val_439 2 1 111 -val_44 1 1 111 -val_443 1 1 111 -val_444 1 1 111 -val_446 1 1 111 -val_448 1 1 111 -val_449 1 1 111 -val_452 1 1 111 -val_453 1 1 111 -val_454 3 1 111 -val_455 1 1 111 -val_457 1 1 111 -val_458 2 1 111 -val_459 2 1 111 -val_460 1 1 111 -val_462 2 1 111 -val_463 2 1 111 -val_466 3 1 111 -val_467 1 1 111 -val_468 4 1 111 -val_469 5 1 111 -val_47 1 1 111 -val_470 1 1 111 -val_472 1 1 111 -val_475 1 1 111 -val_477 1 1 111 -val_478 2 1 111 -val_479 1 1 111 -val_480 3 1 111 -val_481 1 1 111 -val_482 1 1 111 -val_483 1 1 111 -val_484 1 1 111 -val_485 1 1 111 -val_487 1 1 111 -val_489 4 1 111 -val_490 1 1 111 -val_491 1 1 111 -val_492 2 1 111 -val_493 1 1 111 -val_494 1 1 111 -val_495 1 1 111 -val_496 1 1 111 -val_497 1 1 111 -val_498 3 1 111 -val_5 3 1 111 -val_51 2 1 111 -val_53 1 1 111 -val_54 1 1 111 -val_57 1 1 111 -val_58 2 1 111 -val_64 1 1 111 -val_65 1 1 111 -val_66 1 1 111 -val_67 2 1 111 -val_69 1 1 111 -val_70 3 1 111 -val_72 2 1 111 -val_74 1 1 111 -val_76 2 1 111 -val_77 1 1 111 -val_78 1 1 111 -val_8 1 1 111 -val_80 1 1 111 -val_82 1 1 111 -val_83 2 1 111 -val_84 2 1 111 -val_85 1 1 111 -val_86 1 1 111 -val_87 1 1 111 -val_9 1 1 111 -val_90 3 1 111 -val_92 1 1 111 -val_95 2 1 111 -val_96 1 1 111 -val_97 2 1 111 -val_98 2 1 111 -PREHOOK: query: SELECT * from dest2_n36 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2_n36 -PREHOOK: Input: default@dest2_n36@ds=111 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * from dest2_n36 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2_n36 -POSTHOOK: Input: default@dest2_n36@ds=111 -#### A masked pattern was here #### -0 3 1 111 -10 1 1 111 -100 2 1 111 -103 2 1 111 -104 2 1 111 -105 1 1 111 -11 1 1 111 -111 1 1 111 -113 2 1 111 -114 1 1 111 -116 1 1 111 -118 2 1 111 -119 3 1 111 -12 2 1 111 -120 2 1 111 -125 2 1 111 -126 1 1 111 -128 3 1 111 -129 2 1 111 -131 1 1 111 -133 1 1 111 -134 2 1 111 -136 1 1 111 -137 2 1 111 -138 4 1 111 -143 1 1 111 -145 1 1 111 -146 2 1 111 -149 2 1 111 -15 2 1 111 -150 1 1 111 -152 2 1 111 -153 1 1 111 -155 1 1 111 -156 1 1 111 -157 1 1 111 -158 1 1 111 -160 1 1 111 -162 1 1 111 -163 1 1 111 -164 2 1 111 -165 2 1 111 -166 1 1 111 -167 3 1 111 -168 1 1 111 -169 4 1 111 -17 1 1 111 -170 1 1 111 -172 2 1 111 -174 2 1 111 -175 2 1 111 -176 2 1 111 -177 1 1 111 -178 1 1 111 -179 2 1 111 -18 2 1 111 -180 1 1 111 -181 1 1 111 -183 1 1 111 -186 1 1 111 -187 3 1 111 -189 1 1 111 -19 1 1 111 -190 1 1 111 -191 2 1 111 -192 1 1 111 -193 3 1 111 -194 1 1 111 -195 2 1 111 -196 1 1 111 -197 2 1 111 -199 3 1 111 -2 1 1 111 -20 1 1 111 -200 2 1 111 -201 1 1 111 -202 1 1 111 -203 2 1 111 -205 2 1 111 -207 2 1 111 -208 3 1 111 -209 2 1 111 -213 2 1 111 -214 1 1 111 -216 2 1 111 -217 2 1 111 -218 1 1 111 -219 2 1 111 -221 2 1 111 -222 1 1 111 -223 2 1 111 -224 2 1 111 -226 1 1 111 -228 1 1 111 -229 2 1 111 -230 5 1 111 -233 2 1 111 -235 1 1 111 -237 2 1 111 -238 2 1 111 -239 2 1 111 -24 2 1 111 -241 1 1 111 -242 2 1 111 -244 1 1 111 -247 1 1 111 -248 1 1 111 -249 1 1 111 -252 1 1 111 -255 2 1 111 -256 2 1 111 -257 1 1 111 -258 1 1 111 -26 2 1 111 -260 1 1 111 -262 1 1 111 -263 1 1 111 -265 2 1 111 -266 1 1 111 -27 1 1 111 -272 2 1 111 -273 3 1 111 -274 1 1 111 -275 1 1 111 -277 4 1 111 -278 2 1 111 -28 1 1 111 -280 2 1 111 -281 2 1 111 -282 2 1 111 -283 1 1 111 -284 1 1 111 -285 1 1 111 -286 1 1 111 -287 1 1 111 -288 2 1 111 -289 1 1 111 -291 1 1 111 -292 1 1 111 -296 1 1 111 -298 3 1 111 -30 1 1 111 -302 1 1 111 -305 1 1 111 -306 1 1 111 -307 2 1 111 -308 1 1 111 -309 2 1 111 -310 1 1 111 -311 3 1 111 -315 1 1 111 -316 3 1 111 -317 2 1 111 -318 3 1 111 -321 2 1 111 -322 2 1 111 -323 1 1 111 -325 2 1 111 -327 3 1 111 -33 1 1 111 -331 2 1 111 -332 1 1 111 -333 2 1 111 -335 1 1 111 -336 1 1 111 -338 1 1 111 -339 1 1 111 -34 1 1 111 -341 1 1 111 -342 2 1 111 -344 2 1 111 -345 1 1 111 -348 5 1 111 -35 3 1 111 -351 1 1 111 -353 2 1 111 -356 1 1 111 -360 1 1 111 -362 1 1 111 -364 1 1 111 -365 1 1 111 -366 1 1 111 -367 2 1 111 -368 1 1 111 -369 3 1 111 -37 2 1 111 -373 1 1 111 -374 1 1 111 -375 1 1 111 -377 1 1 111 -378 1 1 111 -379 1 1 111 -382 2 1 111 -384 3 1 111 -386 1 1 111 -389 1 1 111 -392 1 1 111 -393 1 1 111 -394 1 1 111 -395 2 1 111 -396 3 1 111 -397 2 1 111 -399 2 1 111 -4 1 1 111 -400 1 1 111 -401 5 1 111 -402 1 1 111 -403 3 1 111 -404 2 1 111 -406 4 1 111 -407 1 1 111 -409 3 1 111 -41 1 1 111 -411 1 1 111 -413 2 1 111 -414 2 1 111 -417 3 1 111 -418 1 1 111 -419 1 1 111 -42 2 1 111 -421 1 1 111 -424 2 1 111 -427 1 1 111 -429 2 1 111 -43 1 1 111 -430 3 1 111 -431 3 1 111 -432 1 1 111 -435 1 1 111 -436 1 1 111 -437 1 1 111 -438 3 1 111 -439 2 1 111 -44 1 1 111 -443 1 1 111 -444 1 1 111 -446 1 1 111 -448 1 1 111 -449 1 1 111 -452 1 1 111 -453 1 1 111 -454 3 1 111 -455 1 1 111 -457 1 1 111 -458 2 1 111 -459 2 1 111 -460 1 1 111 -462 2 1 111 -463 2 1 111 -466 3 1 111 -467 1 1 111 -468 4 1 111 -469 5 1 111 -47 1 1 111 -470 1 1 111 -472 1 1 111 -475 1 1 111 -477 1 1 111 -478 2 1 111 -479 1 1 111 -480 3 1 111 -481 1 1 111 -482 1 1 111 -483 1 1 111 -484 1 1 111 -485 1 1 111 -487 1 1 111 -489 4 1 111 -490 1 1 111 -491 1 1 111 -492 2 1 111 -493 1 1 111 -494 1 1 111 -495 1 1 111 -496 1 1 111 -497 1 1 111 -498 3 1 111 -5 3 1 111 -51 2 1 111 -53 1 1 111 -54 1 1 111 -57 1 1 111 -58 2 1 111 -64 1 1 111 -65 1 1 111 -66 1 1 111 -67 2 1 111 -69 1 1 111 -70 3 1 111 -72 2 1 111 -74 1 1 111 -76 2 1 111 -77 1 1 111 -78 1 1 111 -8 1 1 111 -80 1 1 111 -82 1 1 111 -83 2 1 111 -84 2 1 111 -85 1 1 111 -86 1 1 111 -87 1 1 111 -9 1 1 111 -90 3 1 111 -92 1 1 111 -95 2 1 111 -96 1 1 111 -97 2 1 111 -98 2 1 111 diff --git a/ql/src/test/results/clientpositive/llap/groupby12.q.out b/ql/src/test/results/clientpositive/llap/groupby12.q.out deleted file mode 100644 index add92ca3af..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby12.q.out +++ /dev/null @@ -1,448 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n106(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n106 -POSTHOOK: query: CREATE TABLE dest1_n106(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n106 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n106 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n106 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n106 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n106 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(KEY._col0), count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316 Data size: 32548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col1) (type: int), CAST( _col2 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n106 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n106 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest1_n106 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n106 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n106 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n106 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n106 -POSTHOOK: Lineage: dest1_n106.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n106.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n106.* FROM dest1_n106 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n106 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n106.* FROM dest1_n106 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n106 -#### A masked pattern was here #### -3 1 -1 1 -2 1 -2 1 -2 1 -1 1 -1 1 -1 1 -2 1 -1 1 -1 1 -2 1 -3 1 -2 1 -2 1 -2 1 -1 1 -3 1 -2 1 -1 1 -1 1 -2 1 -1 1 -2 1 -4 1 -1 1 -1 1 -2 1 -2 1 -2 1 -1 1 -2 1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 -2 1 -2 1 -1 1 -3 1 -1 1 -4 1 -1 1 -1 1 -2 1 -2 1 -2 1 -2 1 -1 1 -1 1 -2 1 -2 1 -1 1 -1 1 -1 1 -1 1 -3 1 -1 1 -1 1 -1 1 -2 1 -1 1 -3 1 -1 1 -2 1 -1 1 -2 1 -3 1 -1 1 -1 1 -2 1 -1 1 -1 1 -2 1 -2 1 -2 1 -3 1 -2 1 -2 1 -1 1 -2 1 -2 1 -1 1 -2 1 -2 1 -1 1 -2 1 -2 1 -1 1 -1 1 -2 1 -5 1 -2 1 -1 1 -2 1 -2 1 -2 1 -2 1 -1 1 -2 1 -1 1 -1 1 -1 1 -1 1 -1 1 -2 1 -2 1 -1 1 -1 1 -2 1 -1 1 -1 1 -1 1 -2 1 -1 1 -1 1 -2 1 -3 1 -1 1 -1 1 -4 1 -2 1 -1 1 -2 1 -2 1 -2 1 -1 1 -1 1 -1 1 -1 1 -1 1 -2 1 -1 1 -1 1 -1 1 -1 1 -3 1 -1 1 -1 1 -1 1 -1 1 -2 1 -1 1 -2 1 -1 1 -3 1 -1 1 -3 1 -2 1 -3 1 -2 1 -2 1 -1 1 -2 1 -3 1 -1 1 -2 1 -1 1 -2 1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 -2 1 -2 1 -1 1 -5 1 -3 1 -1 1 -2 1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 -2 1 -1 1 -3 1 -2 1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 -2 1 -3 1 -1 1 -1 1 -1 1 -1 1 -1 1 -2 1 -3 1 -2 1 -2 1 -1 1 -1 1 -5 1 -1 1 -3 1 -2 1 -4 1 -1 1 -3 1 -1 1 -1 1 -2 1 -2 1 -3 1 -1 1 -1 1 -2 1 -1 1 -2 1 -1 1 -2 1 -1 1 -3 1 -3 1 -1 1 -1 1 -1 1 -1 1 -3 1 -2 1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 -3 1 -1 1 -1 1 -2 1 -2 1 -1 1 -2 1 -2 1 -3 1 -1 1 -4 1 -5 1 -1 1 -1 1 -1 1 -1 1 -1 1 -2 1 -1 1 -3 1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 -4 1 -1 1 -1 1 -2 1 -1 1 -1 1 -1 1 -1 1 -1 1 -3 1 -3 1 -2 1 -1 1 -1 1 -1 1 -2 1 -1 1 -1 1 -1 1 -2 1 -1 1 -3 1 -2 1 -1 1 -2 1 -1 1 -1 1 -1 1 -1 1 -1 1 -2 1 -2 1 -1 1 -1 1 -1 1 -1 1 -3 1 -1 1 -2 1 -1 1 -2 1 -2 1 diff --git a/ql/src/test/results/clientpositive/llap/groupby1_limit.q.out b/ql/src/test/results/clientpositive/llap/groupby1_limit.q.out deleted file mode 100644 index cec9a03e80..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby1_limit.q.out +++ /dev/null @@ -1,146 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n125(key INT, value DOUBLE) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n125 -POSTHOOK: query: CREATE TABLE dest1_n125(key INT, value DOUBLE) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n125 -PREHOOK: query: EXPLAIN -FROM src INSERT OVERWRITE TABLE dest1_n125 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key LIMIT 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n125 -POSTHOOK: query: EXPLAIN -FROM src INSERT OVERWRITE TABLE dest1_n125 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key LIMIT 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n125 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Select Operator - expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: double) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n125 - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n125 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - -PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n125 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key ORDER BY src.key LIMIT 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n125 -POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n125 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key ORDER BY src.key LIMIT 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n125 -POSTHOOK: Lineage: dest1_n125.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n125.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n125.* FROM dest1_n125 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n125 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n125.* FROM dest1_n125 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n125 -#### A masked pattern was here #### -0 0.0 -10 10.0 -100 200.0 -103 206.0 -104 208.0 diff --git a/ql/src/test/results/clientpositive/llap/groupby2_limit.q.out b/ql/src/test/results/clientpositive/llap/groupby2_limit.q.out deleted file mode 100644 index 6888c09a39..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby2_limit.q.out +++ /dev/null @@ -1,108 +0,0 @@ -PREHOOK: query: EXPLAIN -SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN -SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Select Operator - expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 5 - Processor Tree: - ListSink - -PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 0.0 -10 10.0 -100 200.0 -103 206.0 -104 208.0 diff --git a/ql/src/test/results/clientpositive/llap/groupby2_map.q.out b/ql/src/test/results/clientpositive/llap/groupby2_map.q.out deleted file mode 100644 index 67684ced33..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby2_map.q.out +++ /dev/null @@ -1,164 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n16(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n16 -POSTHOOK: query: CREATE TABLE dest1_n16(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n16 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n16 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n16 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n16 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n16 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT _col1), sum(_col1) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n16 - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - outputColumnNames: key, c1, c2 - Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n16 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, c1, c2 - Column Types: string, int, string - Table: default.dest1_n16 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n16 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n16 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n16 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n16 -POSTHOOK: Lineage: dest1_n16.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n16.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n16.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n16.* FROM dest1_n16 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n16 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n16.* FROM dest1_n16 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n16 -#### A masked pattern was here #### -0 1 00.0 -1 71 116414.0 -2 69 225571.0 -3 62 332004.0 -4 74 452763.0 -5 6 5397.0 -6 5 6398.0 -7 6 7735.0 -8 8 8762.0 -9 7 91047.0 diff --git a/ql/src/test/results/clientpositive/llap/groupby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby2_map_multi_distinct.q.out deleted file mode 100644 index e9f8ec6224..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby2_map_multi_distinct.q.out +++ /dev/null @@ -1,324 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n38(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n38 -POSTHOOK: query: CREATE TABLE dest1_n38(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n38 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n38 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n38 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT _col1), sum(_col1), sum(DISTINCT _col1), count(_col2) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: double), _col5 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n38 - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) - outputColumnNames: key, c1, c2, c3, c4 - Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n38 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, c1, c2, c3, c4 - Column Types: string, int, string, int, int - Table: default.dest1_n38 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n38 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n38 -POSTHOOK: Lineage: dest1_n38.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n38.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n38.c3 EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: dest1_n38.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n38.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n38.* FROM dest1_n38 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n38 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n38.* FROM dest1_n38 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n38 -#### A masked pattern was here #### -0 1 00.0 0 3 -1 71 116414.0 10044 115 -2 69 225571.0 15780 111 -3 62 332004.0 20119 99 -4 74 452763.0 30965 124 -5 6 5397.0 278 10 -6 5 6398.0 331 6 -7 6 7735.0 447 10 -8 8 8762.0 595 10 -9 7 91047.0 577 12 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n38 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n38 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT _col0), sum(_col1), sum(DISTINCT _col1), count(_col2) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: double), _col5 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n38 - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) - outputColumnNames: key, c1, c2, c3, c4 - Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 2152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n38 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, c1, c2, c3, c4 - Column Types: string, int, string, int, int - Table: default.dest1_n38 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n38 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n38 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n38 -POSTHOOK: Lineage: dest1_n38.c1 EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: dest1_n38.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n38.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n38.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n38.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n38.* FROM dest1_n38 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n38 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n38.* FROM dest1_n38 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n38 -#### A masked pattern was here #### -0 1 00.0 0 3 -1 1 116414.0 10044 115 -2 1 225571.0 15780 111 -3 1 332004.0 20119 99 -4 1 452763.0 30965 124 -5 1 5397.0 278 10 -6 1 6398.0 331 6 -7 1 7735.0 447 10 -8 1 8762.0 595 10 -9 1 91047.0 577 12 diff --git a/ql/src/test/results/clientpositive/llap/groupby2_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby2_map_skew.q.out deleted file mode 100644 index 9b87d90a7a..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby2_map_skew.q.out +++ /dev/null @@ -1,181 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n10(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n10 -POSTHOOK: query: CREATE TABLE dest1_n10(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n10 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n10 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n10 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n10 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n10 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT _col1), sum(_col1) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1) - keys: KEY._col0 (type: string) - mode: partials - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: double) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n10 - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - outputColumnNames: key, c1, c2 - Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n10 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, c1, c2 - Column Types: string, int, string - Table: default.dest1_n10 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n10 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n10 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n10 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n10 -POSTHOOK: Lineage: dest1_n10.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n10.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n10.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n10.* FROM dest1_n10 order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n10 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n10.* FROM dest1_n10 order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n10 -#### A masked pattern was here #### -0 1 00.0 -1 71 116414.0 -2 69 225571.0 -3 62 332004.0 -4 74 452763.0 -5 6 5397.0 -6 5 6398.0 -7 6 7735.0 -8 8 8762.0 -9 7 91047.0 diff --git a/ql/src/test/results/clientpositive/llap/groupby2_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby2_noskew.q.out deleted file mode 100644 index 2e8481d485..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby2_noskew.q.out +++ /dev/null @@ -1,150 +0,0 @@ -PREHOOK: query: CREATE TABLE dest_g2_n1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest_g2_n1 -POSTHOOK: query: CREATE TABLE dest_g2_n1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest_g2_n1 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest_g2_n1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest_g2_n1 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest_g2_n1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest_g2_n1 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316 Data size: 31916 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g2_n1 - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - outputColumnNames: key, c1, c2 - Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), c1 (type: int), c2 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g2_n1 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, c1, c2 - Column Types: string, int, string - Table: default.dest_g2_n1 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest_g2_n1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest_g2_n1 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest_g2_n1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest_g2_n1 -POSTHOOK: Lineage: dest_g2_n1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_g2_n1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_g2_n1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT dest_g2_n1.* FROM dest_g2_n1 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_g2_n1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest_g2_n1.* FROM dest_g2_n1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_g2_n1 -#### A masked pattern was here #### -0 1 00.0 -1 71 116414.0 -2 69 225571.0 -3 62 332004.0 -4 74 452763.0 -5 6 5397.0 -6 5 6398.0 -7 6 7735.0 -8 8 8762.0 -9 7 91047.0 diff --git a/ql/src/test/results/clientpositive/llap/groupby2_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby2_noskew_multi_distinct.q.out deleted file mode 100644 index c4fce00adf..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby2_noskew_multi_distinct.q.out +++ /dev/null @@ -1,153 +0,0 @@ -PREHOOK: query: CREATE TABLE dest_g2_n3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest_g2_n3 -POSTHOOK: query: CREATE TABLE dest_g2_n3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest_g2_n3 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest_g2_n3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest_g2_n3 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest_g2_n3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest_g2_n3 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 36972 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g2_n3 - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) - outputColumnNames: key, c1, c2, c3, c4 - Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g2_n3 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, c1, c2, c3, c4 - Column Types: string, int, string, int, int - Table: default.dest_g2_n3 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest_g2_n3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest_g2_n3 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest_g2_n3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest_g2_n3 -POSTHOOK: Lineage: dest_g2_n3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_g2_n3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_g2_n3.c3 EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: dest_g2_n3.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_g2_n3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT dest_g2_n3.* FROM dest_g2_n3 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_g2_n3 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest_g2_n3.* FROM dest_g2_n3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_g2_n3 -#### A masked pattern was here #### -0 1 00.0 0 3 -1 71 116414.0 10044 115 -2 69 225571.0 15780 111 -3 62 332004.0 20119 99 -4 74 452763.0 30965 124 -5 6 5397.0 278 10 -6 5 6398.0 331 6 -7 6 7735.0 447 10 -8 8 8762.0 595 10 -9 7 91047.0 577 12 diff --git a/ql/src/test/results/clientpositive/llap/groupby3_map.q.out b/ql/src/test/results/clientpositive/llap/groupby3_map.q.out deleted file mode 100644 index 0bef509f54..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby3_map.q.out +++ /dev/null @@ -1,204 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n53(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n53 -POSTHOOK: query: CREATE TABLE dest1_n53(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n53 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n53 SELECT - sum(substr(src.value,5)), - avg(substr(src.value,5)), - avg(DISTINCT substr(src.value,5)), - max(substr(src.value,5)), - min(substr(src.value,5)), - std(substr(src.value,5)), - stddev_samp(substr(src.value,5)), - variance(substr(src.value,5)), - var_samp(substr(src.value,5)) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n53 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n53 SELECT - sum(substr(src.value,5)), - avg(substr(src.value,5)), - avg(DISTINCT substr(src.value,5)), - max(substr(src.value,5)), - min(substr(src.value,5)), - std(substr(src.value,5)), - stddev_samp(substr(src.value,5)), - variance(substr(src.value,5)), - var_samp(substr(src.value,5)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n53 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0), count(_col0), sum(DISTINCT _col0), count(DISTINCT _col0), max(_col0), min(_col0), sum(_col2), sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double), _col2 (type: bigint), _col5 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(VALUE._col4), min(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n53 - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) - outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n53 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 - Column Types: double, double, double, double, double, double, double, double, double - Table: default.dest1_n53 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n53 SELECT - sum(substr(src.value,5)), - avg(substr(src.value,5)), - avg(DISTINCT substr(src.value,5)), - max(substr(src.value,5)), - min(substr(src.value,5)), - std(substr(src.value,5)), - stddev_samp(substr(src.value,5)), - variance(substr(src.value,5)), - var_samp(substr(src.value,5)) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n53 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n53 SELECT - sum(substr(src.value,5)), - avg(substr(src.value,5)), - avg(DISTINCT substr(src.value,5)), - max(substr(src.value,5)), - min(substr(src.value,5)), - std(substr(src.value,5)), - stddev_samp(substr(src.value,5)), - variance(substr(src.value,5)), - var_samp(substr(src.value,5)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n53 -POSTHOOK: Lineage: dest1_n53.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n53.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n53.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ] -POSTHOOK: Lineage: dest1_n53.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n53.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n53.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n53.c7 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n53.c8 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n53.c9 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT -c1, -c2, -round(c3, 11) c3, -c4, -c5, -round(c6, 11) c6, -round(c7, 11) c7, -round(c8, 5) c8, -round(c9, 9) c9 -FROM dest1_n53 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n53 -#### A masked pattern was here #### -POSTHOOK: query: SELECT -c1, -c2, -round(c3, 11) c3, -c4, -c5, -round(c6, 11) c6, -round(c7, 11) c7, -round(c8, 5) c8, -round(c9, 9) c9 -FROM dest1_n53 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n53 -#### A masked pattern was here #### -130091.0 260.182 256.10355987055 98.0 0.0 142.92680950752 143.06995106519 20428.07288 20469.010897796 diff --git a/ql/src/test/results/clientpositive/llap/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/llap/groupby3_map_multi_distinct.q.out deleted file mode 100644 index 2290c22814..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby3_map_multi_distinct.q.out +++ /dev/null @@ -1,194 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n68(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n68 -POSTHOOK: query: CREATE TABLE dest1_n68(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n68 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n68 SELECT - sum(substr(src.value,5)), - avg(substr(src.value,5)), - avg(DISTINCT substr(src.value,5)), - max(substr(src.value,5)), - min(substr(src.value,5)), - std(substr(src.value,5)), - stddev_samp(substr(src.value,5)), - variance(substr(src.value,5)), - var_samp(substr(src.value,5)), - sum(DISTINCT substr(src.value, 5)), - count(DISTINCT substr(src.value, 5)) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n68 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n68 SELECT - sum(substr(src.value,5)), - avg(substr(src.value,5)), - avg(DISTINCT substr(src.value,5)), - max(substr(src.value,5)), - min(substr(src.value,5)), - std(substr(src.value,5)), - stddev_samp(substr(src.value,5)), - variance(substr(src.value,5)), - var_samp(substr(src.value,5)), - sum(DISTINCT substr(src.value, 5)), - count(DISTINCT substr(src.value, 5)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n68 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0), count(_col0), sum(DISTINCT _col0), count(DISTINCT _col0), max(_col0), min(_col0), sum(_col2), sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double), _col2 (type: bigint), _col5 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(VALUE._col4), min(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n68 - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) - outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4664 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4664 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4664 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n68 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 - Column Types: double, double, double, double, double, double, double, double, double, double, double - Table: default.dest1_n68 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n68 SELECT - sum(substr(src.value,5)), - avg(substr(src.value,5)), - avg(DISTINCT substr(src.value,5)), - max(substr(src.value,5)), - min(substr(src.value,5)), - std(substr(src.value,5)), - stddev_samp(substr(src.value,5)), - variance(substr(src.value,5)), - var_samp(substr(src.value,5)), - sum(DISTINCT substr(src.value, 5)), - count(DISTINCT substr(src.value, 5)) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n68 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n68 SELECT - sum(substr(src.value,5)), - avg(substr(src.value,5)), - avg(DISTINCT substr(src.value,5)), - max(substr(src.value,5)), - min(substr(src.value,5)), - std(substr(src.value,5)), - stddev_samp(substr(src.value,5)), - variance(substr(src.value,5)), - var_samp(substr(src.value,5)), - sum(DISTINCT substr(src.value, 5)), - count(DISTINCT substr(src.value, 5)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n68 -POSTHOOK: Lineage: dest1_n68.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n68.c10 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n68.c11 EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: dest1_n68.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n68.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ] -POSTHOOK: Lineage: dest1_n68.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n68.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n68.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n68.c7 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n68.c8 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n68.c9 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n68.* FROM dest1_n68 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n68 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n68.* FROM dest1_n68 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n68 -#### A masked pattern was here #### -130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876000002 20469.010897795593 79136.0 309.0 diff --git a/ql/src/test/results/clientpositive/llap/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby3_map_skew.q.out deleted file mode 100644 index 258e54591e..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby3_map_skew.q.out +++ /dev/null @@ -1,201 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n131(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n131 -POSTHOOK: query: CREATE TABLE dest1_n131(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n131 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n131 SELECT - sum(substr(src.value,5)), - avg(substr(src.value,5)), - avg(DISTINCT substr(src.value,5)), - max(substr(src.value,5)), - min(substr(src.value,5)), - std(substr(src.value,5)), - stddev_samp(substr(src.value,5)), - variance(substr(src.value,5)), - var_samp(substr(src.value,5)) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n131 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n131 SELECT - sum(substr(src.value,5)), - avg(substr(src.value,5)), - avg(DISTINCT substr(src.value,5)), - max(substr(src.value,5)), - min(substr(src.value,5)), - std(substr(src.value,5)), - stddev_samp(substr(src.value,5)), - variance(substr(src.value,5)), - var_samp(substr(src.value,5)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n131 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: value - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5)) - keys: substr(value, 5) (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 250 Data size: 323500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 323500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(DISTINCT KEY._col0:0._col0), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) - mode: partials - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n131 - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) - outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 - Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n131 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 - Column Types: double, double, double, double, double, double, double, double, double - Table: default.dest1_n131 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n131 SELECT - sum(substr(src.value,5)), - avg(substr(src.value,5)), - avg(DISTINCT substr(src.value,5)), - max(substr(src.value,5)), - min(substr(src.value,5)), - std(substr(src.value,5)), - stddev_samp(substr(src.value,5)), - variance(substr(src.value,5)), - var_samp(substr(src.value,5)) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n131 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n131 SELECT - sum(substr(src.value,5)), - avg(substr(src.value,5)), - avg(DISTINCT substr(src.value,5)), - max(substr(src.value,5)), - min(substr(src.value,5)), - std(substr(src.value,5)), - stddev_samp(substr(src.value,5)), - variance(substr(src.value,5)), - var_samp(substr(src.value,5)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n131 -POSTHOOK: Lineage: dest1_n131.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n131.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n131.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n131.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n131.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n131.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n131.c7 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n131.c8 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n131.c9 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT ROUND(c1, 1), ROUND(c2, 3), ROUND(c3, 5), ROUND(c4, 1), ROUND(c5, 1), ROUND(c6, 5), -ROUND(c7,5), ROUND(c8, 5), ROUND(c9, 5) FROM dest1_n131 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n131 -#### A masked pattern was here #### -POSTHOOK: query: SELECT ROUND(c1, 1), ROUND(c2, 3), ROUND(c3, 5), ROUND(c4, 1), ROUND(c5, 1), ROUND(c6, 5), -ROUND(c7,5), ROUND(c8, 5), ROUND(c9, 5) FROM dest1_n131 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n131 -#### A masked pattern was here #### -130091.0 260.182 256.10356 98.0 0.0 142.92681 143.06995 20428.07288 20469.0109 diff --git a/ql/src/test/results/clientpositive/llap/groupby4.q.out b/ql/src/test/results/clientpositive/llap/groupby4.q.out deleted file mode 100644 index c0492e4c33..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby4.q.out +++ /dev/null @@ -1,173 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n168(c1 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n168 -POSTHOOK: query: CREATE TABLE dest1_n168(c1 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n168 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n168 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n168 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n168 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n168 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(key, 1, 1) (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: partial1 - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0 - Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n168 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: c1 - Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') - mode: partial1 - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0) - mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n168 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1 - Column Types: string - Table: default.dest1_n168 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n168 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n168 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n168 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n168 -POSTHOOK: Lineage: dest1_n168.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n168.* FROM dest1_n168 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n168 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n168.* FROM dest1_n168 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n168 -#### A masked pattern was here #### -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 diff --git a/ql/src/test/results/clientpositive/llap/groupby4_map.q.out b/ql/src/test/results/clientpositive/llap/groupby4_map.q.out deleted file mode 100644 index 7d4d7a0524..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby4_map.q.out +++ /dev/null @@ -1,132 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n40(key INT) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n40 -POSTHOOK: query: CREATE TABLE dest1_n40(key INT) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n40 -PREHOOK: query: EXPLAIN -FROM src INSERT OVERWRITE TABLE dest1_n40 SELECT count(1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n40 -POSTHOOK: query: EXPLAIN -FROM src INSERT OVERWRITE TABLE dest1_n40 SELECT count(1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n40 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n40 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: key - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll') - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n40 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key - Column Types: int - Table: default.dest1_n40 - -PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n40 SELECT count(1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n40 -POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n40 SELECT count(1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n40 -POSTHOOK: Lineage: dest1_n40.key EXPRESSION [(src)src.null, ] -PREHOOK: query: SELECT dest1_n40.* FROM dest1_n40 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n40 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n40.* FROM dest1_n40 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n40 -#### A masked pattern was here #### -500 diff --git a/ql/src/test/results/clientpositive/llap/groupby4_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby4_map_skew.q.out deleted file mode 100644 index eb53e25441..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby4_map_skew.q.out +++ /dev/null @@ -1,132 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n141(key INT) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n141 -POSTHOOK: query: CREATE TABLE dest1_n141(key INT) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n141 -PREHOOK: query: EXPLAIN -FROM src INSERT OVERWRITE TABLE dest1_n141 SELECT count(1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n141 -POSTHOOK: query: EXPLAIN -FROM src INSERT OVERWRITE TABLE dest1_n141 SELECT count(1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n141 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n141 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: key - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll') - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n141 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key - Column Types: int - Table: default.dest1_n141 - -PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n141 SELECT count(1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n141 -POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n141 SELECT count(1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n141 -POSTHOOK: Lineage: dest1_n141.key EXPRESSION [(src)src.null, ] -PREHOOK: query: SELECT dest1_n141.* FROM dest1_n141 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n141 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n141.* FROM dest1_n141 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n141 -#### A masked pattern was here #### -500 diff --git a/ql/src/test/results/clientpositive/llap/groupby4_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby4_noskew.q.out deleted file mode 100644 index 14b97c4733..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby4_noskew.q.out +++ /dev/null @@ -1,143 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n33(c1 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n33 -POSTHOOK: query: CREATE TABLE dest1_n33(c1 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n33 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n33 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n33 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n33 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n33 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(key, 1, 1) (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n33 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: c1 - Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n33 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1 - Column Types: string - Table: default.dest1_n33 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n33 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n33 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n33 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n33 -POSTHOOK: Lineage: dest1_n33.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n33.* FROM dest1_n33 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n33 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n33.* FROM dest1_n33 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n33 -#### A masked pattern was here #### -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 diff --git a/ql/src/test/results/clientpositive/llap/groupby5.q.out b/ql/src/test/results/clientpositive/llap/groupby5.q.out deleted file mode 100644 index 2405264a37..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby5.q.out +++ /dev/null @@ -1,489 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n36(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n36 -POSTHOOK: query: CREATE TABLE dest1_n36(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n36 -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest1_n36 -SELECT src.key, sum(substr(src.value,5)) -FROM src -GROUP BY src.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n36 -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest1_n36 -SELECT src.key, sum(substr(src.value,5)) -FROM src -GROUP BY src.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n36 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n36 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n36 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest1_n36 - -PREHOOK: query: INSERT OVERWRITE TABLE dest1_n36 -SELECT src.key, sum(substr(src.value,5)) -FROM src -GROUP BY src.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n36 -POSTHOOK: query: INSERT OVERWRITE TABLE dest1_n36 -SELECT src.key, sum(substr(src.value,5)) -FROM src -GROUP BY src.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n36 -POSTHOOK: Lineage: dest1_n36.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n36.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n36.* FROM dest1_n36 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n36 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n36.* FROM dest1_n36 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n36 -#### A masked pattern was here #### -0 0.0 -10 10.0 -100 200.0 -103 206.0 -104 208.0 -105 105.0 -11 11.0 -111 111.0 -113 226.0 -114 114.0 -116 116.0 -118 236.0 -119 357.0 -12 24.0 -120 240.0 -125 250.0 -126 126.0 -128 384.0 -129 258.0 -131 131.0 -133 133.0 -134 268.0 -136 136.0 -137 274.0 -138 552.0 -143 143.0 -145 145.0 -146 292.0 -149 298.0 -15 30.0 -150 150.0 -152 304.0 -153 153.0 -155 155.0 -156 156.0 -157 157.0 -158 158.0 -160 160.0 -162 162.0 -163 163.0 -164 328.0 -165 330.0 -166 166.0 -167 501.0 -168 168.0 -169 676.0 -17 17.0 -170 170.0 -172 344.0 -174 348.0 -175 350.0 -176 352.0 -177 177.0 -178 178.0 -179 358.0 -18 36.0 -180 180.0 -181 181.0 -183 183.0 -186 186.0 -187 561.0 -189 189.0 -19 19.0 -190 190.0 -191 382.0 -192 192.0 -193 579.0 -194 194.0 -195 390.0 -196 196.0 -197 394.0 -199 597.0 -2 2.0 -20 20.0 -200 400.0 -201 201.0 -202 202.0 -203 406.0 -205 410.0 -207 414.0 -208 624.0 -209 418.0 -213 426.0 -214 214.0 -216 432.0 -217 434.0 -218 218.0 -219 438.0 -221 442.0 -222 222.0 -223 446.0 -224 448.0 -226 226.0 -228 228.0 -229 458.0 -230 1150.0 -233 466.0 -235 235.0 -237 474.0 -238 476.0 -239 478.0 -24 48.0 -241 241.0 -242 484.0 -244 244.0 -247 247.0 -248 248.0 -249 249.0 -252 252.0 -255 510.0 -256 512.0 -257 257.0 -258 258.0 -26 52.0 -260 260.0 -262 262.0 -263 263.0 -265 530.0 -266 266.0 -27 27.0 -272 544.0 -273 819.0 -274 274.0 -275 275.0 -277 1108.0 -278 556.0 -28 28.0 -280 560.0 -281 562.0 -282 564.0 -283 283.0 -284 284.0 -285 285.0 -286 286.0 -287 287.0 -288 576.0 -289 289.0 -291 291.0 -292 292.0 -296 296.0 -298 894.0 -30 30.0 -302 302.0 -305 305.0 -306 306.0 -307 614.0 -308 308.0 -309 618.0 -310 310.0 -311 933.0 -315 315.0 -316 948.0 -317 634.0 -318 954.0 -321 642.0 -322 644.0 -323 323.0 -325 650.0 -327 981.0 -33 33.0 -331 662.0 -332 332.0 -333 666.0 -335 335.0 -336 336.0 -338 338.0 -339 339.0 -34 34.0 -341 341.0 -342 684.0 -344 688.0 -345 345.0 -348 1740.0 -35 105.0 -351 351.0 -353 706.0 -356 356.0 -360 360.0 -362 362.0 -364 364.0 -365 365.0 -366 366.0 -367 734.0 -368 368.0 -369 1107.0 -37 74.0 -373 373.0 -374 374.0 -375 375.0 -377 377.0 -378 378.0 -379 379.0 -382 764.0 -384 1152.0 -386 386.0 -389 389.0 -392 392.0 -393 393.0 -394 394.0 -395 790.0 -396 1188.0 -397 794.0 -399 798.0 -4 4.0 -400 400.0 -401 2005.0 -402 402.0 -403 1209.0 -404 808.0 -406 1624.0 -407 407.0 -409 1227.0 -41 41.0 -411 411.0 -413 826.0 -414 828.0 -417 1251.0 -418 418.0 -419 419.0 -42 84.0 -421 421.0 -424 848.0 -427 427.0 -429 858.0 -43 43.0 -430 1290.0 -431 1293.0 -432 432.0 -435 435.0 -436 436.0 -437 437.0 -438 1314.0 -439 878.0 -44 44.0 -443 443.0 -444 444.0 -446 446.0 -448 448.0 -449 449.0 -452 452.0 -453 453.0 -454 1362.0 -455 455.0 -457 457.0 -458 916.0 -459 918.0 -460 460.0 -462 924.0 -463 926.0 -466 1398.0 -467 467.0 -468 1872.0 -469 2345.0 -47 47.0 -470 470.0 -472 472.0 -475 475.0 -477 477.0 -478 956.0 -479 479.0 -480 1440.0 -481 481.0 -482 482.0 -483 483.0 -484 484.0 -485 485.0 -487 487.0 -489 1956.0 -490 490.0 -491 491.0 -492 984.0 -493 493.0 -494 494.0 -495 495.0 -496 496.0 -497 497.0 -498 1494.0 -5 15.0 -51 102.0 -53 53.0 -54 54.0 -57 57.0 -58 116.0 -64 64.0 -65 65.0 -66 66.0 -67 134.0 -69 69.0 -70 210.0 -72 144.0 -74 74.0 -76 152.0 -77 77.0 -78 78.0 -8 8.0 -80 80.0 -82 82.0 -83 166.0 -84 168.0 -85 85.0 -86 86.0 -87 87.0 -9 9.0 -90 270.0 -92 92.0 -95 190.0 -96 96.0 -97 194.0 -98 196.0 diff --git a/ql/src/test/results/clientpositive/llap/groupby5_map.q.out b/ql/src/test/results/clientpositive/llap/groupby5_map.q.out deleted file mode 100644 index ddd0557df2..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby5_map.q.out +++ /dev/null @@ -1,134 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n75(key INT) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n75 -POSTHOOK: query: CREATE TABLE dest1_n75(key INT) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n75 -PREHOOK: query: EXPLAIN -FROM src INSERT OVERWRITE TABLE dest1_n75 SELECT sum(src.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n75 -POSTHOOK: query: EXPLAIN -FROM src INSERT OVERWRITE TABLE dest1_n75 SELECT sum(src.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n75 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(key) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n75 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: key - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll') - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n75 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key - Column Types: int - Table: default.dest1_n75 - -PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n75 SELECT sum(src.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n75 -POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n75 SELECT sum(src.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n75 -POSTHOOK: Lineage: dest1_n75.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n75.* FROM dest1_n75 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n75 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n75.* FROM dest1_n75 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n75 -#### A masked pattern was here #### -130091 diff --git a/ql/src/test/results/clientpositive/llap/groupby5_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby5_map_skew.q.out deleted file mode 100644 index b6d681b3f1..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby5_map_skew.q.out +++ /dev/null @@ -1,134 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n76(key INT) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n76 -POSTHOOK: query: CREATE TABLE dest1_n76(key INT) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n76 -PREHOOK: query: EXPLAIN -FROM src INSERT OVERWRITE TABLE dest1_n76 SELECT sum(src.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n76 -POSTHOOK: query: EXPLAIN -FROM src INSERT OVERWRITE TABLE dest1_n76 SELECT sum(src.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n76 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(key) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n76 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: key - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll') - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n76 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key - Column Types: int - Table: default.dest1_n76 - -PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n76 SELECT sum(src.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n76 -POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1_n76 SELECT sum(src.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n76 -POSTHOOK: Lineage: dest1_n76.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n76.* FROM dest1_n76 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n76 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n76.* FROM dest1_n76 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n76 -#### A masked pattern was here #### -130091 diff --git a/ql/src/test/results/clientpositive/llap/groupby6.q.out b/ql/src/test/results/clientpositive/llap/groupby6.q.out deleted file mode 100644 index 0e45e045b3..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby6.q.out +++ /dev/null @@ -1,173 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n92(c1 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n92 -POSTHOOK: query: CREATE TABLE dest1_n92(c1 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n92 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n92 SELECT DISTINCT substr(src.value,5,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n92 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n92 SELECT DISTINCT substr(src.value,5,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n92 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(value, 5, 1) (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: partial1 - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0 - Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n92 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: c1 - Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') - mode: partial1 - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0) - mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n92 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1 - Column Types: string - Table: default.dest1_n92 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n92 SELECT DISTINCT substr(src.value,5,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n92 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n92 SELECT DISTINCT substr(src.value,5,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n92 -POSTHOOK: Lineage: dest1_n92.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n92.* FROM dest1_n92 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n92 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n92.* FROM dest1_n92 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n92 -#### A masked pattern was here #### -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 diff --git a/ql/src/test/results/clientpositive/llap/groupby6_map.q.out b/ql/src/test/results/clientpositive/llap/groupby6_map.q.out deleted file mode 100644 index 13e38e6304..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby6_map.q.out +++ /dev/null @@ -1,155 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n19(c1 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n19 -POSTHOOK: query: CREATE TABLE dest1_n19(c1 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n19 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n19 SELECT DISTINCT substr(src.value,5,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n19 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n19 SELECT DISTINCT substr(src.value,5,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n19 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(value, 5, 1) (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n19 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: c1 - Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n19 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1 - Column Types: string - Table: default.dest1_n19 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n19 SELECT DISTINCT substr(src.value,5,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n19 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n19 SELECT DISTINCT substr(src.value,5,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n19 -POSTHOOK: Lineage: dest1_n19.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n19.* FROM dest1_n19 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n19 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n19.* FROM dest1_n19 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n19 -#### A masked pattern was here #### -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 diff --git a/ql/src/test/results/clientpositive/llap/groupby6_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby6_map_skew.q.out deleted file mode 100644 index c07bc8ae26..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby6_map_skew.q.out +++ /dev/null @@ -1,170 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n98(c1 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n98 -POSTHOOK: query: CREATE TABLE dest1_n98(c1 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n98 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n98 SELECT DISTINCT substr(src.value,5,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n98 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n98 SELECT DISTINCT substr(src.value,5,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n98 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(value, 5, 1) (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: partials - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n98 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: c1 - Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0) - mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n98 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1 - Column Types: string - Table: default.dest1_n98 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n98 SELECT DISTINCT substr(src.value,5,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n98 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n98 SELECT DISTINCT substr(src.value,5,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n98 -POSTHOOK: Lineage: dest1_n98.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n98.* FROM dest1_n98 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n98 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n98.* FROM dest1_n98 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n98 -#### A masked pattern was here #### -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 diff --git a/ql/src/test/results/clientpositive/llap/groupby6_noskew.q.out b/ql/src/test/results/clientpositive/llap/groupby6_noskew.q.out deleted file mode 100644 index 18e5c756db..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby6_noskew.q.out +++ /dev/null @@ -1,143 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1_n100(c1 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1_n100 -POSTHOOK: query: CREATE TABLE dest1_n100(c1 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1_n100 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n100 SELECT DISTINCT substr(src.value,5,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n100 -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1_n100 SELECT DISTINCT substr(src.value,5,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n100 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: substr(value, 5, 1) (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n100 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: c1 - Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: c1 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll') - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n100 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: c1 - Column Types: string - Table: default.dest1_n100 - -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n100 SELECT DISTINCT substr(src.value,5,1) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n100 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1_n100 SELECT DISTINCT substr(src.value,5,1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n100 -POSTHOOK: Lineage: dest1_n100.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT dest1_n100.* FROM dest1_n100 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n100 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1_n100.* FROM dest1_n100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n100 -#### A masked pattern was here #### -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 diff --git a/ql/src/test/results/clientpositive/llap/groupby7_noskew_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/llap/groupby7_noskew_multi_single_reducer.q.out deleted file mode 100644 index 5dc188d761..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby7_noskew_multi_single_reducer.q.out +++ /dev/null @@ -1,293 +0,0 @@ -PREHOOK: query: CREATE TABLE DEST1_n170(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@DEST1_n170 -POSTHOOK: query: CREATE TABLE DEST1_n170(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@DEST1_n170 -PREHOOK: query: CREATE TABLE DEST2_n42(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@DEST2_n42 -POSTHOOK: query: CREATE TABLE DEST2_n42(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@DEST2_n42 -PREHOOK: query: EXPLAIN -FROM SRC -INSERT OVERWRITE TABLE DEST1_n170 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 -INSERT OVERWRITE TABLE DEST2_n42 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n170 -PREHOOK: Output: default@dest2_n42 -POSTHOOK: query: EXPLAIN -FROM SRC -INSERT OVERWRITE TABLE DEST1_n170 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 -INSERT OVERWRITE TABLE DEST2_n42 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n170 -POSTHOOK: Output: default@dest2_n42 -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: substr(value, 5) (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double) - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n170 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n42 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n170 - - Stage: Stage-4 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest1_n170 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n42 - - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest2_n42 - -PREHOOK: query: FROM SRC -INSERT OVERWRITE TABLE DEST1_n170 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 -INSERT OVERWRITE TABLE DEST2_n42 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n170 -PREHOOK: Output: default@dest2_n42 -POSTHOOK: query: FROM SRC -INSERT OVERWRITE TABLE DEST1_n170 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 -INSERT OVERWRITE TABLE DEST2_n42 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n170 -POSTHOOK: Output: default@dest2_n42 -POSTHOOK: Lineage: dest1_n170.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n170.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2_n42.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2_n42.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT DEST1_n170.* FROM DEST1_n170 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n170 -#### A masked pattern was here #### -POSTHOOK: query: SELECT DEST1_n170.* FROM DEST1_n170 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n170 -#### A masked pattern was here #### -0 0.0 -10 10.0 -100 200.0 -103 206.0 -104 208.0 -105 105.0 -11 11.0 -111 111.0 -113 226.0 -114 114.0 -PREHOOK: query: SELECT DEST2_n42.* FROM DEST2_n42 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2_n42 -#### A masked pattern was here #### -POSTHOOK: query: SELECT DEST2_n42.* FROM DEST2_n42 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2_n42 -#### A masked pattern was here #### -0 0.0 -10 10.0 -100 200.0 -103 206.0 -104 208.0 -105 105.0 -11 11.0 -111 111.0 -113 226.0 -114 114.0 diff --git a/ql/src/test/results/clientpositive/llap/groupby8.q.out b/ql/src/test/results/clientpositive/llap/groupby8.q.out deleted file mode 100644 index a008928c9c..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby8.q.out +++ /dev/null @@ -1,1832 +0,0 @@ -PREHOOK: query: CREATE TABLE DEST1_n71(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@DEST1_n71 -POSTHOOK: query: CREATE TABLE DEST1_n71(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@DEST1_n71 -PREHOOK: query: CREATE TABLE DEST2_n15(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@DEST2_n15 -POSTHOOK: query: CREATE TABLE DEST2_n15(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@DEST2_n15 -PREHOOK: query: EXPLAIN -FROM SRC -INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n71 -PREHOOK: Output: default@dest2_n15 -POSTHOOK: query: EXPLAIN -FROM SRC -INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n71 -POSTHOOK: Output: default@dest2_n15 -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 1 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string), substr(value, 5) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string), substr(value, 5) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n71 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n15 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n71 - - Stage: Stage-4 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest1_n71 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n15 - - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest2_n15 - -PREHOOK: query: FROM SRC -INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n71 -PREHOOK: Output: default@dest2_n15 -POSTHOOK: query: FROM SRC -INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n71 -POSTHOOK: Output: default@dest2_n15 -POSTHOOK: Lineage: dest1_n71.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n71.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2_n15.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2_n15.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT DEST1_n71.* FROM DEST1_n71 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n71 -#### A masked pattern was here #### -POSTHOOK: query: SELECT DEST1_n71.* FROM DEST1_n71 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n71 -#### A masked pattern was here #### -0 1 -10 1 -100 1 -103 1 -104 1 -105 1 -11 1 -111 1 -113 1 -114 1 -116 1 -118 1 -119 1 -12 1 -120 1 -125 1 -126 1 -128 1 -129 1 -131 1 -133 1 -134 1 -136 1 -137 1 -138 1 -143 1 -145 1 -146 1 -149 1 -15 1 -150 1 -152 1 -153 1 -155 1 -156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 1 -165 1 -166 1 -167 1 -168 1 -169 1 -17 1 -170 1 -172 1 -174 1 -175 1 -176 1 -177 1 -178 1 -179 1 -18 1 -180 1 -181 1 -183 1 -186 1 -187 1 -189 1 -19 1 -190 1 -191 1 -192 1 -193 1 -194 1 -195 1 -196 1 -197 1 -199 1 -2 1 -20 1 -200 1 -201 1 -202 1 -203 1 -205 1 -207 1 -208 1 -209 1 -213 1 -214 1 -216 1 -217 1 -218 1 -219 1 -221 1 -222 1 -223 1 -224 1 -226 1 -228 1 -229 1 -230 1 -233 1 -235 1 -237 1 -238 1 -239 1 -24 1 -241 1 -242 1 -244 1 -247 1 -248 1 -249 1 -252 1 -255 1 -256 1 -257 1 -258 1 -26 1 -260 1 -262 1 -263 1 -265 1 -266 1 -27 1 -272 1 -273 1 -274 1 -275 1 -277 1 -278 1 -28 1 -280 1 -281 1 -282 1 -283 1 -284 1 -285 1 -286 1 -287 1 -288 1 -289 1 -291 1 -292 1 -296 1 -298 1 -30 1 -302 1 -305 1 -306 1 -307 1 -308 1 -309 1 -310 1 -311 1 -315 1 -316 1 -317 1 -318 1 -321 1 -322 1 -323 1 -325 1 -327 1 -33 1 -331 1 -332 1 -333 1 -335 1 -336 1 -338 1 -339 1 -34 1 -341 1 -342 1 -344 1 -345 1 -348 1 -35 1 -351 1 -353 1 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 1 -368 1 -369 1 -37 1 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 1 -384 1 -386 1 -389 1 -392 1 -393 1 -394 1 -395 1 -396 1 -397 1 -399 1 -4 1 -400 1 -401 1 -402 1 -403 1 -404 1 -406 1 -407 1 -409 1 -41 1 -411 1 -413 1 -414 1 -417 1 -418 1 -419 1 -42 1 -421 1 -424 1 -427 1 -429 1 -43 1 -430 1 -431 1 -432 1 -435 1 -436 1 -437 1 -438 1 -439 1 -44 1 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 1 -455 1 -457 1 -458 1 -459 1 -460 1 -462 1 -463 1 -466 1 -467 1 -468 1 -469 1 -47 1 -470 1 -472 1 -475 1 -477 1 -478 1 -479 1 -480 1 -481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 1 -490 1 -491 1 -492 1 -493 1 -494 1 -495 1 -496 1 -497 1 -498 1 -5 1 -51 1 -53 1 -54 1 -57 1 -58 1 -64 1 -65 1 -66 1 -67 1 -69 1 -70 1 -72 1 -74 1 -76 1 -77 1 -78 1 -8 1 -80 1 -82 1 -83 1 -84 1 -85 1 -86 1 -87 1 -9 1 -90 1 -92 1 -95 1 -96 1 -97 1 -98 1 -PREHOOK: query: SELECT DEST2_n15.* FROM DEST2_n15 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2_n15 -#### A masked pattern was here #### -POSTHOOK: query: SELECT DEST2_n15.* FROM DEST2_n15 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2_n15 -#### A masked pattern was here #### -0 1 -10 1 -100 1 -103 1 -104 1 -105 1 -11 1 -111 1 -113 1 -114 1 -116 1 -118 1 -119 1 -12 1 -120 1 -125 1 -126 1 -128 1 -129 1 -131 1 -133 1 -134 1 -136 1 -137 1 -138 1 -143 1 -145 1 -146 1 -149 1 -15 1 -150 1 -152 1 -153 1 -155 1 -156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 1 -165 1 -166 1 -167 1 -168 1 -169 1 -17 1 -170 1 -172 1 -174 1 -175 1 -176 1 -177 1 -178 1 -179 1 -18 1 -180 1 -181 1 -183 1 -186 1 -187 1 -189 1 -19 1 -190 1 -191 1 -192 1 -193 1 -194 1 -195 1 -196 1 -197 1 -199 1 -2 1 -20 1 -200 1 -201 1 -202 1 -203 1 -205 1 -207 1 -208 1 -209 1 -213 1 -214 1 -216 1 -217 1 -218 1 -219 1 -221 1 -222 1 -223 1 -224 1 -226 1 -228 1 -229 1 -230 1 -233 1 -235 1 -237 1 -238 1 -239 1 -24 1 -241 1 -242 1 -244 1 -247 1 -248 1 -249 1 -252 1 -255 1 -256 1 -257 1 -258 1 -26 1 -260 1 -262 1 -263 1 -265 1 -266 1 -27 1 -272 1 -273 1 -274 1 -275 1 -277 1 -278 1 -28 1 -280 1 -281 1 -282 1 -283 1 -284 1 -285 1 -286 1 -287 1 -288 1 -289 1 -291 1 -292 1 -296 1 -298 1 -30 1 -302 1 -305 1 -306 1 -307 1 -308 1 -309 1 -310 1 -311 1 -315 1 -316 1 -317 1 -318 1 -321 1 -322 1 -323 1 -325 1 -327 1 -33 1 -331 1 -332 1 -333 1 -335 1 -336 1 -338 1 -339 1 -34 1 -341 1 -342 1 -344 1 -345 1 -348 1 -35 1 -351 1 -353 1 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 1 -368 1 -369 1 -37 1 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 1 -384 1 -386 1 -389 1 -392 1 -393 1 -394 1 -395 1 -396 1 -397 1 -399 1 -4 1 -400 1 -401 1 -402 1 -403 1 -404 1 -406 1 -407 1 -409 1 -41 1 -411 1 -413 1 -414 1 -417 1 -418 1 -419 1 -42 1 -421 1 -424 1 -427 1 -429 1 -43 1 -430 1 -431 1 -432 1 -435 1 -436 1 -437 1 -438 1 -439 1 -44 1 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 1 -455 1 -457 1 -458 1 -459 1 -460 1 -462 1 -463 1 -466 1 -467 1 -468 1 -469 1 -47 1 -470 1 -472 1 -475 1 -477 1 -478 1 -479 1 -480 1 -481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 1 -490 1 -491 1 -492 1 -493 1 -494 1 -495 1 -496 1 -497 1 -498 1 -5 1 -51 1 -53 1 -54 1 -57 1 -58 1 -64 1 -65 1 -66 1 -67 1 -69 1 -70 1 -72 1 -74 1 -76 1 -77 1 -78 1 -8 1 -80 1 -82 1 -83 1 -84 1 -85 1 -86 1 -87 1 -9 1 -90 1 -92 1 -95 1 -96 1 -97 1 -98 1 -PREHOOK: query: EXPLAIN -FROM SRC -INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n71 -PREHOOK: Output: default@dest2_n15 -POSTHOOK: query: EXPLAIN -FROM SRC -INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n71 -POSTHOOK: Output: default@dest2_n15 -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 1 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string), substr(value, 5) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string), substr(value, 5) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n71 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n15 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 316 Data size: 59408 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: int), value (type: string) - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') - mode: partial1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n71 - - Stage: Stage-4 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest1_n71 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n15 - - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest2_n15 - -PREHOOK: query: FROM SRC -INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n71 -PREHOOK: Output: default@dest2_n15 -POSTHOOK: query: FROM SRC -INSERT OVERWRITE TABLE DEST1_n71 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -INSERT OVERWRITE TABLE DEST2_n15 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n71 -POSTHOOK: Output: default@dest2_n15 -POSTHOOK: Lineage: dest1_n71.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n71.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2_n15.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2_n15.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT DEST1_n71.* FROM DEST1_n71 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n71 -#### A masked pattern was here #### -POSTHOOK: query: SELECT DEST1_n71.* FROM DEST1_n71 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n71 -#### A masked pattern was here #### -0 1 -10 1 -100 1 -103 1 -104 1 -105 1 -11 1 -111 1 -113 1 -114 1 -116 1 -118 1 -119 1 -12 1 -120 1 -125 1 -126 1 -128 1 -129 1 -131 1 -133 1 -134 1 -136 1 -137 1 -138 1 -143 1 -145 1 -146 1 -149 1 -15 1 -150 1 -152 1 -153 1 -155 1 -156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 1 -165 1 -166 1 -167 1 -168 1 -169 1 -17 1 -170 1 -172 1 -174 1 -175 1 -176 1 -177 1 -178 1 -179 1 -18 1 -180 1 -181 1 -183 1 -186 1 -187 1 -189 1 -19 1 -190 1 -191 1 -192 1 -193 1 -194 1 -195 1 -196 1 -197 1 -199 1 -2 1 -20 1 -200 1 -201 1 -202 1 -203 1 -205 1 -207 1 -208 1 -209 1 -213 1 -214 1 -216 1 -217 1 -218 1 -219 1 -221 1 -222 1 -223 1 -224 1 -226 1 -228 1 -229 1 -230 1 -233 1 -235 1 -237 1 -238 1 -239 1 -24 1 -241 1 -242 1 -244 1 -247 1 -248 1 -249 1 -252 1 -255 1 -256 1 -257 1 -258 1 -26 1 -260 1 -262 1 -263 1 -265 1 -266 1 -27 1 -272 1 -273 1 -274 1 -275 1 -277 1 -278 1 -28 1 -280 1 -281 1 -282 1 -283 1 -284 1 -285 1 -286 1 -287 1 -288 1 -289 1 -291 1 -292 1 -296 1 -298 1 -30 1 -302 1 -305 1 -306 1 -307 1 -308 1 -309 1 -310 1 -311 1 -315 1 -316 1 -317 1 -318 1 -321 1 -322 1 -323 1 -325 1 -327 1 -33 1 -331 1 -332 1 -333 1 -335 1 -336 1 -338 1 -339 1 -34 1 -341 1 -342 1 -344 1 -345 1 -348 1 -35 1 -351 1 -353 1 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 1 -368 1 -369 1 -37 1 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 1 -384 1 -386 1 -389 1 -392 1 -393 1 -394 1 -395 1 -396 1 -397 1 -399 1 -4 1 -400 1 -401 1 -402 1 -403 1 -404 1 -406 1 -407 1 -409 1 -41 1 -411 1 -413 1 -414 1 -417 1 -418 1 -419 1 -42 1 -421 1 -424 1 -427 1 -429 1 -43 1 -430 1 -431 1 -432 1 -435 1 -436 1 -437 1 -438 1 -439 1 -44 1 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 1 -455 1 -457 1 -458 1 -459 1 -460 1 -462 1 -463 1 -466 1 -467 1 -468 1 -469 1 -47 1 -470 1 -472 1 -475 1 -477 1 -478 1 -479 1 -480 1 -481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 1 -490 1 -491 1 -492 1 -493 1 -494 1 -495 1 -496 1 -497 1 -498 1 -5 1 -51 1 -53 1 -54 1 -57 1 -58 1 -64 1 -65 1 -66 1 -67 1 -69 1 -70 1 -72 1 -74 1 -76 1 -77 1 -78 1 -8 1 -80 1 -82 1 -83 1 -84 1 -85 1 -86 1 -87 1 -9 1 -90 1 -92 1 -95 1 -96 1 -97 1 -98 1 -PREHOOK: query: SELECT DEST2_n15.* FROM DEST2_n15 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2_n15 -#### A masked pattern was here #### -POSTHOOK: query: SELECT DEST2_n15.* FROM DEST2_n15 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2_n15 -#### A masked pattern was here #### -0 1 -10 1 -100 1 -103 1 -104 1 -105 1 -11 1 -111 1 -113 1 -114 1 -116 1 -118 1 -119 1 -12 1 -120 1 -125 1 -126 1 -128 1 -129 1 -131 1 -133 1 -134 1 -136 1 -137 1 -138 1 -143 1 -145 1 -146 1 -149 1 -15 1 -150 1 -152 1 -153 1 -155 1 -156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 1 -165 1 -166 1 -167 1 -168 1 -169 1 -17 1 -170 1 -172 1 -174 1 -175 1 -176 1 -177 1 -178 1 -179 1 -18 1 -180 1 -181 1 -183 1 -186 1 -187 1 -189 1 -19 1 -190 1 -191 1 -192 1 -193 1 -194 1 -195 1 -196 1 -197 1 -199 1 -2 1 -20 1 -200 1 -201 1 -202 1 -203 1 -205 1 -207 1 -208 1 -209 1 -213 1 -214 1 -216 1 -217 1 -218 1 -219 1 -221 1 -222 1 -223 1 -224 1 -226 1 -228 1 -229 1 -230 1 -233 1 -235 1 -237 1 -238 1 -239 1 -24 1 -241 1 -242 1 -244 1 -247 1 -248 1 -249 1 -252 1 -255 1 -256 1 -257 1 -258 1 -26 1 -260 1 -262 1 -263 1 -265 1 -266 1 -27 1 -272 1 -273 1 -274 1 -275 1 -277 1 -278 1 -28 1 -280 1 -281 1 -282 1 -283 1 -284 1 -285 1 -286 1 -287 1 -288 1 -289 1 -291 1 -292 1 -296 1 -298 1 -30 1 -302 1 -305 1 -306 1 -307 1 -308 1 -309 1 -310 1 -311 1 -315 1 -316 1 -317 1 -318 1 -321 1 -322 1 -323 1 -325 1 -327 1 -33 1 -331 1 -332 1 -333 1 -335 1 -336 1 -338 1 -339 1 -34 1 -341 1 -342 1 -344 1 -345 1 -348 1 -35 1 -351 1 -353 1 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 1 -368 1 -369 1 -37 1 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 1 -384 1 -386 1 -389 1 -392 1 -393 1 -394 1 -395 1 -396 1 -397 1 -399 1 -4 1 -400 1 -401 1 -402 1 -403 1 -404 1 -406 1 -407 1 -409 1 -41 1 -411 1 -413 1 -414 1 -417 1 -418 1 -419 1 -42 1 -421 1 -424 1 -427 1 -429 1 -43 1 -430 1 -431 1 -432 1 -435 1 -436 1 -437 1 -438 1 -439 1 -44 1 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 1 -455 1 -457 1 -458 1 -459 1 -460 1 -462 1 -463 1 -466 1 -467 1 -468 1 -469 1 -47 1 -470 1 -472 1 -475 1 -477 1 -478 1 -479 1 -480 1 -481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 1 -490 1 -491 1 -492 1 -493 1 -494 1 -495 1 -496 1 -497 1 -498 1 -5 1 -51 1 -53 1 -54 1 -57 1 -58 1 -64 1 -65 1 -66 1 -67 1 -69 1 -70 1 -72 1 -74 1 -76 1 -77 1 -78 1 -8 1 -80 1 -82 1 -83 1 -84 1 -85 1 -86 1 -87 1 -9 1 -90 1 -92 1 -95 1 -96 1 -97 1 -98 1 diff --git a/ql/src/test/results/clientpositive/llap/groupby8_map_skew.q.out b/ql/src/test/results/clientpositive/llap/groupby8_map_skew.q.out deleted file mode 100644 index 8668593a92..0000000000 --- a/ql/src/test/results/clientpositive/llap/groupby8_map_skew.q.out +++ /dev/null @@ -1,920 +0,0 @@ -PREHOOK: query: CREATE TABLE DEST1_n87(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@DEST1_n87 -POSTHOOK: query: CREATE TABLE DEST1_n87(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@DEST1_n87 -PREHOOK: query: CREATE TABLE DEST2_n22(key INT, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@DEST2_n22 -POSTHOOK: query: CREATE TABLE DEST2_n22(key INT, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@DEST2_n22 -PREHOOK: query: EXPLAIN -FROM SRC -INSERT OVERWRITE TABLE DEST1_n87 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -INSERT OVERWRITE TABLE DEST2_n22 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n87 -PREHOOK: Output: default@dest2_n22 -POSTHOOK: query: EXPLAIN -FROM SRC -INSERT OVERWRITE TABLE DEST1_n87 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -INSERT OVERWRITE TABLE DEST2_n22 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n87 -POSTHOOK: Output: default@dest2_n22 -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), substr(value, 5) (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), substr(value, 5) (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: partials - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n87 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: partials - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col1 AS STRING) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n22 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 250 Data size: 47000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n87 - - Stage: Stage-4 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest1_n87 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2_n22 - - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: int, string - Table: default.dest2_n22 - -PREHOOK: query: FROM SRC -INSERT OVERWRITE TABLE DEST1_n87 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -INSERT OVERWRITE TABLE DEST2_n22 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1_n87 -PREHOOK: Output: default@dest2_n22 -POSTHOOK: query: FROM SRC -INSERT OVERWRITE TABLE DEST1_n87 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -INSERT OVERWRITE TABLE DEST2_n22 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1_n87 -POSTHOOK: Output: default@dest2_n22 -POSTHOOK: Lineage: dest1_n87.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1_n87.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2_n22.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2_n22.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT DEST1_n87.* FROM DEST1_n87 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1_n87 -#### A masked pattern was here #### -POSTHOOK: query: SELECT DEST1_n87.* FROM DEST1_n87 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1_n87 -#### A masked pattern was here #### -0 1 -10 1 -100 1 -103 1 -104 1 -105 1 -11 1 -111 1 -113 1 -114 1 -116 1 -118 1 -119 1 -12 1 -120 1 -125 1 -126 1 -128 1 -129 1 -131 1 -133 1 -134 1 -136 1 -137 1 -138 1 -143 1 -145 1 -146 1 -149 1 -15 1 -150 1 -152 1 -153 1 -155 1 -156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 1 -165 1 -166 1 -167 1 -168 1 -169 1 -17 1 -170 1 -172 1 -174 1 -175 1 -176 1 -177 1 -178 1 -179 1 -18 1 -180 1 -181 1 -183 1 -186 1 -187 1 -189 1 -19 1 -190 1 -191 1 -192 1 -193 1 -194 1 -195 1 -196 1 -197 1 -199 1 -2 1 -20 1 -200 1 -201 1 -202 1 -203 1 -205 1 -207 1 -208 1 -209 1 -213 1 -214 1 -216 1 -217 1 -218 1 -219 1 -221 1 -222 1 -223 1 -224 1 -226 1 -228 1 -229 1 -230 1 -233 1 -235 1 -237 1 -238 1 -239 1 -24 1 -241 1 -242 1 -244 1 -247 1 -248 1 -249 1 -252 1 -255 1 -256 1 -257 1 -258 1 -26 1 -260 1 -262 1 -263 1 -265 1 -266 1 -27 1 -272 1 -273 1 -274 1 -275 1 -277 1 -278 1 -28 1 -280 1 -281 1 -282 1 -283 1 -284 1 -285 1 -286 1 -287 1 -288 1 -289 1 -291 1 -292 1 -296 1 -298 1 -30 1 -302 1 -305 1 -306 1 -307 1 -308 1 -309 1 -310 1 -311 1 -315 1 -316 1 -317 1 -318 1 -321 1 -322 1 -323 1 -325 1 -327 1 -33 1 -331 1 -332 1 -333 1 -335 1 -336 1 -338 1 -339 1 -34 1 -341 1 -342 1 -344 1 -345 1 -348 1 -35 1 -351 1 -353 1 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 1 -368 1 -369 1 -37 1 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 1 -384 1 -386 1 -389 1 -392 1 -393 1 -394 1 -395 1 -396 1 -397 1 -399 1 -4 1 -400 1 -401 1 -402 1 -403 1 -404 1 -406 1 -407 1 -409 1 -41 1 -411 1 -413 1 -414 1 -417 1 -418 1 -419 1 -42 1 -421 1 -424 1 -427 1 -429 1 -43 1 -430 1 -431 1 -432 1 -435 1 -436 1 -437 1 -438 1 -439 1 -44 1 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 1 -455 1 -457 1 -458 1 -459 1 -460 1 -462 1 -463 1 -466 1 -467 1 -468 1 -469 1 -47 1 -470 1 -472 1 -475 1 -477 1 -478 1 -479 1 -480 1 -481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 1 -490 1 -491 1 -492 1 -493 1 -494 1 -495 1 -496 1 -497 1 -498 1 -5 1 -51 1 -53 1 -54 1 -57 1 -58 1 -64 1 -65 1 -66 1 -67 1 -69 1 -70 1 -72 1 -74 1 -76 1 -77 1 -78 1 -8 1 -80 1 -82 1 -83 1 -84 1 -85 1 -86 1 -87 1 -9 1 -90 1 -92 1 -95 1 -96 1 -97 1 -98 1 -PREHOOK: query: SELECT DEST2_n22.* FROM DEST2_n22 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2_n22 -#### A masked pattern was here #### -POSTHOOK: query: SELECT DEST2_n22.* FROM DEST2_n22 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2_n22 -#### A masked pattern was here #### -0 1 -10 1 -100 1 -103 1 -104 1 -105 1 -11 1 -111 1 -113 1 -114 1 -116 1 -118 1 -119 1 -12 1 -120 1 -125 1 -126 1 -128 1 -129 1 -131 1 -133 1 -134 1 -136 1 -137 1 -138 1 -143 1 -145 1 -146 1 -149 1 -15 1 -150 1 -152 1 -153 1 -155 1 -156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 1 -165 1 -166 1 -167 1 -168 1 -169 1 -17 1 -170 1 -172 1 -174 1 -175 1 -176 1 -177 1 -178 1 -179 1 -18 1 -180 1 -181 1 -183 1 -186 1 -187 1 -189 1 -19 1 -190 1 -191 1 -192 1 -193 1 -194 1 -195 1 -196 1 -197 1 -199 1 -2 1 -20 1 -200 1 -201 1 -202 1 -203 1 -205 1 -207 1 -208 1 -209 1 -213 1 -214 1 -216 1 -217 1 -218 1 -219 1 -221 1 -222 1 -223 1 -224 1 -226 1 -228 1 -229 1 -230 1 -233 1 -235 1 -237 1 -238 1 -239 1 -24 1 -241 1 -242 1 -244 1 -247 1 -248 1 -249 1 -252 1 -255 1 -256 1 -257 1 -258 1 -26 1 -260 1 -262 1 -263 1 -265 1 -266 1 -27 1 -272 1 -273 1 -274 1 -275 1 -277 1 -278 1 -28 1 -280 1 -281 1 -282 1 -283 1 -284 1 -285 1 -286 1 -287 1 -288 1 -289 1 -291 1 -292 1 -296 1 -298 1 -30 1 -302 1 -305 1 -306 1 -307 1 -308 1 -309 1 -310 1 -311 1 -315 1 -316 1 -317 1 -318 1 -321 1 -322 1 -323 1 -325 1 -327 1 -33 1 -331 1 -332 1 -333 1 -335 1 -336 1 -338 1 -339 1 -34 1 -341 1 -342 1 -344 1 -345 1 -348 1 -35 1 -351 1 -353 1 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 1 -368 1 -369 1 -37 1 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 1 -384 1 -386 1 -389 1 -392 1 -393 1 -394 1 -395 1 -396 1 -397 1 -399 1 -4 1 -400 1 -401 1 -402 1 -403 1 -404 1 -406 1 -407 1 -409 1 -41 1 -411 1 -413 1 -414 1 -417 1 -418 1 -419 1 -42 1 -421 1 -424 1 -427 1 -429 1 -43 1 -430 1 -431 1 -432 1 -435 1 -436 1 -437 1 -438 1 -439 1 -44 1 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 1 -455 1 -457 1 -458 1 -459 1 -460 1 -462 1 -463 1 -466 1 -467 1 -468 1 -469 1 -47 1 -470 1 -472 1 -475 1 -477 1 -478 1 -479 1 -480 1 -481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 1 -490 1 -491 1 -492 1 -493 1 -494 1 -495 1 -496 1 -497 1 -498 1 -5 1 -51 1 -53 1 -54 1 -57 1 -58 1 -64 1 -65 1 -66 1 -67 1 -69 1 -70 1 -72 1 -74 1 -76 1 -77 1 -78 1 -8 1 -80 1 -82 1 -83 1 -84 1 -85 1 -86 1 -87 1 -9 1 -90 1 -92 1 -95 1 -96 1 -97 1 -98 1 diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_partitioned.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_partitioned.q.out index 11f927e3d5..72433bdf0d 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_partitioned.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_partitioned.q.out @@ -87,6 +87,8 @@ POSTHOOK: Input: default@multi_partitioned@p=2 #### A masked pattern was here #### 0 456 2 10 456 2 +97 455 2 +98 455 2 100 457 2 103 457 2 455 97 1 @@ -95,8 +97,6 @@ POSTHOOK: Input: default@multi_partitioned@p=2 456 10 1 457 100 1 457 103 1 -97 455 2 -98 455 2 PREHOOK: query: desc formatted multi_partitioned PREHOOK: type: DESCTABLE PREHOOK: Input: default@multi_partitioned @@ -175,6 +175,8 @@ POSTHOOK: Input: default@multi_partitioned@p=2 #### A masked pattern was here #### 0 456 1 10 456 1 +97 455 1 +98 455 1 100 457 1 103 457 1 455 97 2 @@ -183,8 +185,6 @@ POSTHOOK: Input: default@multi_partitioned@p=2 456 10 2 457 100 2 457 103 2 -97 455 1 -98 455 1 PREHOOK: query: desc formatted multi_partitioned PREHOOK: type: DESCTABLE PREHOOK: Input: default@multi_partitioned @@ -263,6 +263,8 @@ POSTHOOK: Input: default@multi_partitioned@p=2 #### A masked pattern was here #### 0 456 1 10 456 1 +97 455 1 +98 455 1 100 457 1 103 457 1 455 97 2 @@ -277,8 +279,6 @@ POSTHOOK: Input: default@multi_partitioned@p=2 457 100 2 457 103 2 457 103 2 -97 455 1 -98 455 1 PREHOOK: query: desc formatted multi_partitioned PREHOOK: type: DESCTABLE PREHOOK: Input: default@multi_partitioned @@ -371,6 +371,10 @@ POSTHOOK: Input: default@multi_partitioned@p=457 0 456 1 10 456 1 10 456 1 +97 455 1 +97 455 1 +98 455 1 +98 455 1 100 457 1 100 457 1 103 457 1 @@ -393,10 +397,6 @@ POSTHOOK: Input: default@multi_partitioned@p=457 457 103 2 457 103 2 457 103 457 -97 455 1 -97 455 1 -98 455 1 -98 455 1 PREHOOK: query: desc formatted multi_partitioned PREHOOK: type: DESCTABLE PREHOOK: Input: default@multi_partitioned @@ -484,6 +484,12 @@ POSTHOOK: Input: default@multi_partitioned@p=457 10 456 1 10 456 1 10 456 1 +97 455 1 +97 455 1 +97 455 1 +98 455 1 +98 455 1 +98 455 1 100 457 1 100 457 1 100 457 1 @@ -514,12 +520,6 @@ POSTHOOK: Input: default@multi_partitioned@p=457 457 103 2 457 103 2 457 103 457 -97 455 1 -97 455 1 -97 455 1 -98 455 1 -98 455 1 -98 455 1 PREHOOK: query: desc formatted multi_partitioned PREHOOK: type: DESCTABLE PREHOOK: Input: default@multi_partitioned diff --git a/ql/src/test/results/clientpositive/llap/parquet_timestampt_to_bigint.q.out b/ql/src/test/results/clientpositive/llap/parquet_timestampt_to_bigint.q.out deleted file mode 100644 index 63af5b8cc9..0000000000 --- a/ql/src/test/results/clientpositive/llap/parquet_timestampt_to_bigint.q.out +++ /dev/null @@ -1,60 +0,0 @@ -PREHOOK: query: DROP TABLE ts_pq -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE ts_pq -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE EXTERNAL TABLE ts_pq (ts1 TIMESTAMP) - STORED AS PARQUET -#### A masked pattern was here #### -PREHOOK: type: CREATETABLE -#### A masked pattern was here #### -PREHOOK: Output: database:default -PREHOOK: Output: default@ts_pq -POSTHOOK: query: CREATE EXTERNAL TABLE ts_pq (ts1 TIMESTAMP) - STORED AS PARQUET -#### A masked pattern was here #### -POSTHOOK: type: CREATETABLE -#### A masked pattern was here #### -POSTHOOK: Output: database:default -POSTHOOK: Output: default@ts_pq -PREHOOK: query: INSERT INTO ts_pq VALUES ('1998-10-03 09:58:31.231') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@ts_pq -POSTHOOK: query: INSERT INTO ts_pq VALUES ('1998-10-03 09:58:31.231') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@ts_pq -POSTHOOK: Lineage: ts_pq.ts1 SCRIPT [] -PREHOOK: query: SELECT * FROM ts_pq -PREHOOK: type: QUERY -PREHOOK: Input: default@ts_pq -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM ts_pq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ts_pq -#### A masked pattern was here #### -1998-10-03 09:58:31.231 -PREHOOK: query: CREATE EXTERNAL TABLE ts_pq_2 (ts2 BIGINT) - STORED AS PARQUET -#### A masked pattern was here #### -PREHOOK: type: CREATETABLE -#### A masked pattern was here #### -PREHOOK: Output: database:default -PREHOOK: Output: default@ts_pq_2 -POSTHOOK: query: CREATE EXTERNAL TABLE ts_pq_2 (ts2 BIGINT) - STORED AS PARQUET -#### A masked pattern was here #### -POSTHOOK: type: CREATETABLE -#### A masked pattern was here #### -POSTHOOK: Output: database:default -POSTHOOK: Output: default@ts_pq_2 -PREHOOK: query: SELECT * FROM ts_pq_2 -PREHOOK: type: QUERY -PREHOOK: Input: default@ts_pq_2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM ts_pq_2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ts_pq_2 -#### A masked pattern was here #### -61111231000000 -#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/partition_wise_fileformat15.q.out b/ql/src/test/results/clientpositive/llap/partition_wise_fileformat15.q.out index 40eb2bf3ad..87098d265b 100644 --- a/ql/src/test/results/clientpositive/llap/partition_wise_fileformat15.q.out +++ b/ql/src/test/results/clientpositive/llap/partition_wise_fileformat15.q.out @@ -133,9 +133,9 @@ POSTHOOK: Input: default@partition_test_partitioned_n6 POSTHOOK: Input: default@partition_test_partitioned_n6@dt=1 POSTHOOK: Input: default@partition_test_partitioned_n6@dt=2 #### A masked pattern was here #### -172 val_86 val_86 2 476 val_238 NULL 1 476 val_238 NULL 1 +172 val_86 val_86 2 PREHOOK: query: select * from partition_test_partitioned_n6 where dt is not null PREHOOK: type: QUERY PREHOOK: Input: default@partition_test_partitioned_n6 diff --git a/ql/src/test/results/clientpositive/llap/partition_wise_fileformat16.q.out b/ql/src/test/results/clientpositive/llap/partition_wise_fileformat16.q.out index 516d1f909b..233e22953a 100644 --- a/ql/src/test/results/clientpositive/llap/partition_wise_fileformat16.q.out +++ b/ql/src/test/results/clientpositive/llap/partition_wise_fileformat16.q.out @@ -133,9 +133,9 @@ POSTHOOK: Input: default@partition_test_partitioned_n10 POSTHOOK: Input: default@partition_test_partitioned_n10@dt=1 POSTHOOK: Input: default@partition_test_partitioned_n10@dt=2 #### A masked pattern was here #### -172 val_86 val_86 2 476 val_238 NULL 1 476 val_238 NULL 1 +172 val_86 val_86 2 PREHOOK: query: select * from partition_test_partitioned_n10 where dt is not null PREHOOK: type: QUERY PREHOOK: Input: default@partition_test_partitioned_n10 diff --git a/ql/src/test/results/clientpositive/llap/reset_conf.q.out b/ql/src/test/results/clientpositive/llap/reset_conf.q.out index 7d11095844..12f2555f27 100644 --- a/ql/src/test/results/clientpositive/llap/reset_conf.q.out +++ b/ql/src/test/results/clientpositive/llap/reset_conf.q.out @@ -2,10 +2,8 @@ hive.skewjoin.key=100000 hive.skewjoin.mapjoin.min.split=33554432 hive.skewjoin.key=300000 hive.skewjoin.mapjoin.min.split=256000000 -hive.query.max.length=100Mb hive.skewjoin.key=100000 hive.skewjoin.mapjoin.min.split=33554432 -hive.query.max.length=100Mb PREHOOK: query: select 'After setting hive.skewjoin.key and hive.skewjoin.mapjoin.min.split' PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -38,4 +36,3 @@ POSTHOOK: Input: _dummy_database@_dummy_table After resetting both to default hive.skewjoin.key=100000 hive.skewjoin.mapjoin.min.split=33554432 -hive.query.max.length=100Mb diff --git a/ql/src/test/results/clientpositive/llap/show_materialized_views.q.out b/ql/src/test/results/clientpositive/llap/show_materialized_views.q.out index d377f97f14..57bd93b545 100644 --- a/ql/src/test/results/clientpositive/llap/show_materialized_views.q.out +++ b/ql/src/test/results/clientpositive/llap/show_materialized_views.q.out @@ -149,6 +149,7 @@ POSTHOOK: type: SHOWMATERIALIZEDVIEWS shtb_full_view2 Yes Manual refresh (Valid for 5min) shtb_test1_view1 No Manual refresh shtb_test1_view2 Yes Manual refresh (Valid always) + PREHOOK: query: EXPLAIN SHOW MATERIALIZED VIEWS '*test*' PREHOOK: type: SHOWMATERIALIZEDVIEWS POSTHOOK: query: EXPLAIN SHOW MATERIALIZED VIEWS '*test*' @@ -176,6 +177,7 @@ POSTHOOK: type: SHOWMATERIALIZEDVIEWS # MV Name Rewriting Enabled Mode shtb_test1_view1 No Manual refresh shtb_test1_view2 Yes Manual refresh (Valid always) + PREHOOK: query: SHOW MATERIALIZED VIEWS '*view2' PREHOOK: type: SHOWMATERIALIZEDVIEWS POSTHOOK: query: SHOW MATERIALIZED VIEWS '*view2' @@ -183,6 +185,7 @@ POSTHOOK: type: SHOWMATERIALIZEDVIEWS # MV Name Rewriting Enabled Mode shtb_full_view2 Yes Manual refresh (Valid for 5min) shtb_test1_view2 Yes Manual refresh (Valid always) + PREHOOK: query: EXPLAIN SHOW MATERIALIZED VIEWS LIKE 'shtb_test1_view1|shtb_test1_view2' PREHOOK: type: SHOWMATERIALIZEDVIEWS POSTHOOK: query: EXPLAIN SHOW MATERIALIZED VIEWS LIKE 'shtb_test1_view1|shtb_test1_view2' @@ -210,6 +213,7 @@ POSTHOOK: type: SHOWMATERIALIZEDVIEWS # MV Name Rewriting Enabled Mode shtb_test1_view1 No Manual refresh shtb_test1_view2 Yes Manual refresh (Valid always) + PREHOOK: query: USE test2 PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:test2 @@ -223,6 +227,7 @@ POSTHOOK: type: SHOWMATERIALIZEDVIEWS # MV Name Rewriting Enabled Mode shtb_test1_view1 No Manual refresh shtb_test2_view2 No Manual refresh + PREHOOK: query: USE default PREHOOK: type: SWITCHDATABASE PREHOOK: Input: database:default @@ -256,6 +261,7 @@ POSTHOOK: type: SHOWMATERIALIZEDVIEWS shtb_full_view2 Yes Manual refresh (Valid for 5min) shtb_test1_view1 No Manual refresh shtb_test1_view2 Yes Manual refresh (Valid always) + PREHOOK: query: SHOW MATERIALIZED VIEWS FROM test2 PREHOOK: type: SHOWMATERIALIZEDVIEWS POSTHOOK: query: SHOW MATERIALIZED VIEWS FROM test2 @@ -263,6 +269,7 @@ POSTHOOK: type: SHOWMATERIALIZEDVIEWS # MV Name Rewriting Enabled Mode shtb_test1_view1 No Manual refresh shtb_test2_view2 No Manual refresh + PREHOOK: query: EXPLAIN SHOW MATERIALIZED VIEWS IN test1 PREHOOK: type: SHOWMATERIALIZEDVIEWS POSTHOOK: query: EXPLAIN SHOW MATERIALIZED VIEWS IN test1 @@ -290,6 +297,7 @@ POSTHOOK: type: SHOWMATERIALIZEDVIEWS shtb_full_view2 Yes Manual refresh (Valid for 5min) shtb_test1_view1 No Manual refresh shtb_test1_view2 Yes Manual refresh (Valid always) + PREHOOK: query: SHOW MATERIALIZED VIEWS IN default PREHOOK: type: SHOWMATERIALIZEDVIEWS POSTHOOK: query: SHOW MATERIALIZED VIEWS IN default @@ -321,6 +329,7 @@ POSTHOOK: type: SHOWMATERIALIZEDVIEWS # MV Name Rewriting Enabled Mode shtb_test1_view1 No Manual refresh shtb_test1_view2 Yes Manual refresh (Valid always) + PREHOOK: query: DESCRIBE FORMATTED test1.shtb_full_view2 PREHOOK: type: DESCTABLE PREHOOK: Input: test1@shtb_full_view2 @@ -495,6 +504,7 @@ POSTHOOK: query: SHOW MATERIALIZED VIEWS FROM `database` LIKE "fooview" POSTHOOK: type: SHOWMATERIALIZEDVIEWS # MV Name Rewriting Enabled Mode fooview Yes Manual refresh (Valid until source tables modified) + PREHOOK: query: DESCRIBE FORMATTED `database`.`fooview` PREHOOK: type: DESCTABLE PREHOOK: Input: database@fooview diff --git a/ql/src/test/results/clientpositive/llap/show_tables.q.out b/ql/src/test/results/clientpositive/llap/show_tables.q.out index ade1690e89..342ebd4016 100644 --- a/ql/src/test/results/clientpositive/llap/show_tables.q.out +++ b/ql/src/test/results/clientpositive/llap/show_tables.q.out @@ -195,6 +195,7 @@ bar_n0 MANAGED_TABLE baz MANAGED_TABLE foo_n4 MANAGED_TABLE test_view_n100 VIRTUAL_VIEW + PREHOOK: query: EXPLAIN SHOW TABLES IN test_db PREHOOK: type: SHOWTABLES PREHOOK: Input: database:test_db @@ -259,6 +260,7 @@ bar_n0 MANAGED_TABLE baz MANAGED_TABLE foo_n4 MANAGED_TABLE test_view_n100 VIRTUAL_VIEW + PREHOOK: query: EXPLAIN SHOW TABLES IN test_db "test*" PREHOOK: type: SHOWTABLES PREHOOK: Input: database:test_db @@ -378,6 +380,7 @@ POSTHOOK: type: SHOWTABLES POSTHOOK: Input: database:test_db # Table Name Table Type test_view_n100 VIRTUAL_VIEW + PREHOOK: query: SHOW TABLE EXTENDED IN test_db LIKE foo_n4 PREHOOK: type: SHOW_TABLESTATUS POSTHOOK: query: SHOW TABLE EXTENDED IN test_db LIKE foo_n4 diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup2.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup2.q.out deleted file mode 100644 index e7b3c0ea27..0000000000 --- a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup2.q.out +++ /dev/null @@ -1,634 +0,0 @@ -PREHOOK: query: create table sketch_input (id int, category char(1)) -STORED AS ORC -TBLPROPERTIES ('transactional'='true') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@sketch_input -POSTHOOK: query: create table sketch_input (id int, category char(1)) -STORED AS ORC -TBLPROPERTIES ('transactional'='true') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@sketch_input -PREHOOK: query: insert into table sketch_input values - (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), - (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@sketch_input -POSTHOOK: query: insert into table sketch_input values - (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), - (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@sketch_input -POSTHOOK: Lineage: sketch_input.category SCRIPT [] -POSTHOOK: Lineage: sketch_input.id SCRIPT [] -PREHOOK: query: create materialized view mv_1 as - select category, ds_hll_sketch(id),count(id) from sketch_input group by category -PREHOOK: type: CREATE_MATERIALIZED_VIEW -PREHOOK: Input: default@sketch_input -PREHOOK: Output: database:default -PREHOOK: Output: default@mv_1 -POSTHOOK: query: create materialized view mv_1 as - select category, ds_hll_sketch(id),count(id) from sketch_input group by category -POSTHOOK: type: CREATE_MATERIALIZED_VIEW -POSTHOOK: Input: default@sketch_input -POSTHOOK: Output: database:default -POSTHOOK: Output: default@mv_1 -PREHOOK: query: explain -select 'rewrite; mv matching', category, count(distinct id) from sketch_input group by category -PREHOOK: type: QUERY -PREHOOK: Input: default@mv_1 -PREHOOK: Input: default@sketch_input -#### A masked pattern was here #### -POSTHOOK: query: explain -select 'rewrite; mv matching', category, count(distinct id) from sketch_input group by category -POSTHOOK: type: QUERY -POSTHOOK: Input: default@mv_1 -POSTHOOK: Input: default@sketch_input -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: default.mv_1 - Statistics: Num rows: 2 Data size: 362 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'rewrite; mv matching' (type: string), category (type: char(1)), UDFToLong(ds_hll_estimate(_c1)) (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 394 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 394 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select 'rewrite; mv matching', category, count(distinct id) from sketch_input group by category -PREHOOK: type: QUERY -PREHOOK: Input: default@mv_1 -PREHOOK: Input: default@sketch_input -#### A masked pattern was here #### -POSTHOOK: query: select 'rewrite; mv matching', category, count(distinct id) from sketch_input group by category -POSTHOOK: type: QUERY -POSTHOOK: Input: default@mv_1 -POSTHOOK: Input: default@sketch_input -#### A masked pattern was here #### -rewrite; mv matching a 10 -rewrite; mv matching b 10 -PREHOOK: query: explain -select 'no rewrite; no mv usage', category, count(distinct id) from sketch_input group by category -PREHOOK: type: QUERY -PREHOOK: Input: default@sketch_input -#### A masked pattern was here #### -POSTHOOK: query: explain -select 'no rewrite; no mv usage', category, count(distinct id) from sketch_input group by category -POSTHOOK: type: QUERY -POSTHOOK: Input: default@sketch_input -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: sketch_input - Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: id (type: int), category (type: char(1)) - outputColumnNames: id, category - Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: category (type: char(1)), id (type: int) - minReductionHashAggr: 0.3181818 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: char(1)), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: char(1)) - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: char(1)), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: char(1)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col0) - keys: _col1 (type: char(1)) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'no rewrite; no mv usage' (type: string), _col0 (type: char(1)), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select 'no rewrite; no mv usage', category, count(distinct id) from sketch_input group by category -PREHOOK: type: QUERY -PREHOOK: Input: default@sketch_input -#### A masked pattern was here #### -POSTHOOK: query: select 'no rewrite; no mv usage', category, count(distinct id) from sketch_input group by category -POSTHOOK: type: QUERY -POSTHOOK: Input: default@sketch_input -#### A masked pattern was here #### -no rewrite; no mv usage a 10 -no rewrite; no mv usage b 10 -PREHOOK: query: insert into table sketch_input values - (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), - (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@sketch_input -POSTHOOK: query: insert into table sketch_input values - (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), - (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@sketch_input -POSTHOOK: Lineage: sketch_input.category SCRIPT [] -POSTHOOK: Lineage: sketch_input.id SCRIPT [] -PREHOOK: query: explain -select 'rewrite; but no mv usage', category, count(distinct id) from sketch_input group by category -PREHOOK: type: QUERY -PREHOOK: Input: default@sketch_input -#### A masked pattern was here #### -POSTHOOK: query: explain -select 'rewrite; but no mv usage', category, count(distinct id) from sketch_input group by category -POSTHOOK: type: QUERY -POSTHOOK: Input: default@sketch_input -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: sketch_input - Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: id (type: int), category (type: char(1)) - outputColumnNames: id, category - Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: ds_hll_sketch(id) - keys: category (type: char(1)) - minReductionHashAggr: 0.95454544 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: char(1)) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: char(1)) - Statistics: Num rows: 2 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) - Execution mode: llap - LLAP IO: may be used (ACID table) - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: ds_hll_sketch(VALUE._col0) - keys: KEY._col0 (type: char(1)) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 458 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'rewrite; but no mv usage' (type: string), _col0 (type: char(1)), UDFToLong(ds_hll_estimate(_col1)) (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select 'rewrite; but no mv usage', category, count(distinct id) from sketch_input group by category -PREHOOK: type: QUERY -PREHOOK: Input: default@sketch_input -#### A masked pattern was here #### -POSTHOOK: query: select 'rewrite; but no mv usage', category, count(distinct id) from sketch_input group by category -POSTHOOK: type: QUERY -POSTHOOK: Input: default@sketch_input -#### A masked pattern was here #### -rewrite; but no mv usage a 10 -rewrite; but no mv usage b 10 -PREHOOK: query: explain -alter materialized view mv_1 rebuild -PREHOOK: type: QUERY -PREHOOK: Input: default@mv_1 -PREHOOK: Input: default@sketch_input -PREHOOK: Output: default@mv_1 -POSTHOOK: query: explain -alter materialized view mv_1 rebuild -POSTHOOK: type: QUERY -POSTHOOK: Input: default@mv_1 -POSTHOOK: Input: default@sketch_input -POSTHOOK: Output: default@mv_1 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 6 <- Union 3 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: sketch_input - filterExpr: (ROW__ID.writeid > 1L) (type: boolean) - Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (ROW__ID.writeid > 1L) (type: boolean) - Statistics: Num rows: 14 Data size: 1246 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: id (type: int), category (type: char(1)) - outputColumnNames: id, category - Statistics: Num rows: 14 Data size: 1246 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: ds_hll_sketch(id), count(id) - keys: category (type: char(1)) - minReductionHashAggr: 0.85714287 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 962 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: char(1)) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: char(1)) - Statistics: Num rows: 2 Data size: 962 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: bigint) - Execution mode: llap - LLAP IO: may be used (ACID table) - Map 6 - Map Operator Tree: - TableScan - alias: default.mv_1 - Statistics: Num rows: 2 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: category (type: char(1)), _c1 (type: binary), _c2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 378 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: ds_hll_union(_col1), sum(_col2) - keys: _col0 (type: char(1)) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 962 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: char(1)) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: char(1)) - Statistics: Num rows: 2 Data size: 962 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: bigint) - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: ds_hll_sketch(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: char(1)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: ds_hll_union(_col1), sum(_col2) - keys: _col0 (type: char(1)) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 962 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: char(1)) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: char(1)) - Statistics: Num rows: 2 Data size: 962 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: ds_hll_union(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: char(1)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: char(1)), _col1 (type: binary), COALESCE(_col2,0L) (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.mv_1 - Select Operator - expressions: _col0 (type: char(1)), _col1 (type: binary), _col2 (type: bigint) - outputColumnNames: category, _c1, _c2 - Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(category, 'hll'), compute_stats(_c1, 'hll'), compute_stats(_c2, 'hll') - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.mv_1 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: category, _c1, _c2 - Column Types: char(1), binary, bigint - Table: default.mv_1 - - Stage: Stage-4 - Materialized View Update - name: default.mv_1 - update creation metadata: true - -PREHOOK: query: alter materialized view mv_1 rebuild -PREHOOK: type: QUERY -PREHOOK: Input: default@mv_1 -PREHOOK: Input: default@sketch_input -PREHOOK: Output: default@mv_1 -POSTHOOK: query: alter materialized view mv_1 rebuild -POSTHOOK: type: QUERY -POSTHOOK: Input: default@mv_1 -POSTHOOK: Input: default@sketch_input -POSTHOOK: Output: default@mv_1 -POSTHOOK: Lineage: mv_1._c1 EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id, type:int, comment:null), (mv_1)default.mv_1.FieldSchema(name:_c1, type:binary, comment:null), ] -POSTHOOK: Lineage: mv_1._c2 EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id, type:int, comment:null), (mv_1)default.mv_1.FieldSchema(name:_c2, type:bigint, comment:null), ] -POSTHOOK: Lineage: mv_1.category EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:category, type:char(1), comment:null), (mv_1)default.mv_1.FieldSchema(name:category, type:char(1), comment:null), ] -PREHOOK: query: explain -select 'rewrite; mv matching', category, count(distinct id) from sketch_input group by category -PREHOOK: type: QUERY -PREHOOK: Input: default@mv_1 -PREHOOK: Input: default@sketch_input -#### A masked pattern was here #### -POSTHOOK: query: explain -select 'rewrite; mv matching', category, count(distinct id) from sketch_input group by category -POSTHOOK: type: QUERY -POSTHOOK: Input: default@mv_1 -POSTHOOK: Input: default@sketch_input -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: default.mv_1 - Statistics: Num rows: 2 Data size: 362 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'rewrite; mv matching' (type: string), category (type: char(1)), UDFToLong(ds_hll_estimate(_c1)) (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 394 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 394 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - LLAP IO: all inputs - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select 'rewrite; mv matching', category, count(distinct id) from sketch_input group by category -PREHOOK: type: QUERY -PREHOOK: Input: default@mv_1 -PREHOOK: Input: default@sketch_input -#### A masked pattern was here #### -POSTHOOK: query: select 'rewrite; mv matching', category, count(distinct id) from sketch_input group by category -POSTHOOK: type: QUERY -POSTHOOK: Input: default@mv_1 -POSTHOOK: Input: default@sketch_input -#### A masked pattern was here #### -rewrite; mv matching a 10 -rewrite; mv matching b 10 -PREHOOK: query: explain -select 'rewrite;mv matching with rollup',count(distinct id) from sketch_input -PREHOOK: type: QUERY -PREHOOK: Input: default@mv_1 -PREHOOK: Input: default@sketch_input -#### A masked pattern was here #### -POSTHOOK: query: explain -select 'rewrite;mv matching with rollup',count(distinct id) from sketch_input -POSTHOOK: type: QUERY -POSTHOOK: Input: default@mv_1 -POSTHOOK: Input: default@sketch_input -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: default.mv_1 - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _c1 (type: binary) - outputColumnNames: _c1 - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: ds_hll_union(_c1) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: ds_hll_union(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'rewrite;mv matching with rollup' (type: string), UDFToLong(ds_hll_estimate(_col0)) (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select 'rewrite;mv matching with rollup',count(distinct id) from sketch_input -PREHOOK: type: QUERY -PREHOOK: Input: default@mv_1 -PREHOOK: Input: default@sketch_input -#### A masked pattern was here #### -POSTHOOK: query: select 'rewrite;mv matching with rollup',count(distinct id) from sketch_input -POSTHOOK: type: QUERY -POSTHOOK: Input: default@mv_1 -POSTHOOK: Input: default@sketch_input -#### A masked pattern was here #### -rewrite;mv matching with rollup 15 -PREHOOK: query: drop materialized view mv_1 -PREHOOK: type: DROP_MATERIALIZED_VIEW -PREHOOK: Input: default@mv_1 -PREHOOK: Output: default@mv_1 -POSTHOOK: query: drop materialized view mv_1 -POSTHOOK: type: DROP_MATERIALIZED_VIEW -POSTHOOK: Input: default@mv_1 -POSTHOOK: Output: default@mv_1 diff --git a/ql/src/test/results/clientpositive/llap/sketches_rewrite.q.out b/ql/src/test/results/clientpositive/llap/sketches_rewrite.q.out deleted file mode 100644 index dedcff9452..0000000000 --- a/ql/src/test/results/clientpositive/llap/sketches_rewrite.q.out +++ /dev/null @@ -1,110 +0,0 @@ -PREHOOK: query: create table sketch_input (id int, category char(1)) -STORED AS ORC -TBLPROPERTIES ('transactional'='true') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@sketch_input -POSTHOOK: query: create table sketch_input (id int, category char(1)) -STORED AS ORC -TBLPROPERTIES ('transactional'='true') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@sketch_input -PREHOOK: query: insert into table sketch_input values - (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), - (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@sketch_input -POSTHOOK: query: insert into table sketch_input values - (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), - (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@sketch_input -POSTHOOK: Lineage: sketch_input.category SCRIPT [] -POSTHOOK: Lineage: sketch_input.id SCRIPT [] -PREHOOK: query: explain -select category, count(distinct id) from sketch_input group by category -PREHOOK: type: QUERY -PREHOOK: Input: default@sketch_input -#### A masked pattern was here #### -POSTHOOK: query: explain -select category, count(distinct id) from sketch_input group by category -POSTHOOK: type: QUERY -POSTHOOK: Input: default@sketch_input -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: sketch_input - Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: id (type: int), category (type: char(1)) - outputColumnNames: id, category - Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: ds_hll_sketch(id) - keys: category (type: char(1)) - minReductionHashAggr: 0.9090909 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: char(1)) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: char(1)) - Statistics: Num rows: 2 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) - Execution mode: llap - LLAP IO: may be used (ACID table) - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: ds_hll_sketch(VALUE._col0) - keys: KEY._col0 (type: char(1)) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 458 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: char(1)), UDFToLong(ds_hll_estimate(_col1)) (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select category, count(distinct id) from sketch_input group by category -PREHOOK: type: QUERY -PREHOOK: Input: default@sketch_input -#### A masked pattern was here #### -POSTHOOK: query: select category, count(distinct id) from sketch_input group by category -POSTHOOK: type: QUERY -POSTHOOK: Input: default@sketch_input -#### A masked pattern was here #### -a 10 -b 10 diff --git a/ql/src/test/results/clientpositive/llap/udf_substr.q.out b/ql/src/test/results/clientpositive/llap/udf_substr.q.out index 7c1a0f18e0..00fa6061e1 100644 --- a/ql/src/test/results/clientpositive/llap/udf_substr.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_substr.q.out @@ -8,7 +8,7 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED substr POSTHOOK: type: DESCFUNCTION substr(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstr(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len -Synonyms: substring, mid +Synonyms: mid, substring pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str. Example: > SELECT substr('Facebook', 5) FROM src LIMIT 1; diff --git a/ql/src/test/results/clientpositive/llap/udf_substring.q.out b/ql/src/test/results/clientpositive/llap/udf_substring.q.out index 7e96b79c0c..d6b1c4aa51 100644 --- a/ql/src/test/results/clientpositive/llap/udf_substring.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_substring.q.out @@ -8,7 +8,7 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED substring POSTHOOK: type: DESCFUNCTION substring(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstring(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len -Synonyms: substr, mid +Synonyms: mid, substr pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str. Example: > SELECT substring('Facebook', 5) FROM src LIMIT 1; diff --git a/ql/src/test/results/clientpositive/llap/vector_offset_limit_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_offset_limit_reduce.q.out deleted file mode 100644 index ed793f0139..0000000000 --- a/ql/src/test/results/clientpositive/llap/vector_offset_limit_reduce.q.out +++ /dev/null @@ -1,51 +0,0 @@ -PREHOOK: query: CREATE EXTERNAL TABLE orderdatatest_ext (col1 int, col2 int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@orderdatatest_ext -POSTHOOK: query: CREATE EXTERNAL TABLE orderdatatest_ext (col1 int, col2 int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@orderdatatest_ext -PREHOOK: query: INSERT INTO orderdatatest_ext VALUES - (1,1), - (1,2), - (1,3), - (1,4), - (1,5), - (1,6), - (1,7), - (1,8), - (1,9), - (1,10) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@orderdatatest_ext -POSTHOOK: query: INSERT INTO orderdatatest_ext VALUES - (1,1), - (1,2), - (1,3), - (1,4), - (1,5), - (1,6), - (1,7), - (1,8), - (1,9), - (1,10) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@orderdatatest_ext -POSTHOOK: Lineage: orderdatatest_ext.col1 SCRIPT [] -POSTHOOK: Lineage: orderdatatest_ext.col2 SCRIPT [] -PREHOOK: query: select * from orderdatatest_ext order by col1 limit 5,100 -PREHOOK: type: QUERY -PREHOOK: Input: default@orderdatatest_ext -#### A masked pattern was here #### -POSTHOOK: query: select * from orderdatatest_ext order by col1 limit 5,100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orderdatatest_ext -#### A masked pattern was here #### -1 6 -1 7 -1 8 -1 9 -1 10 diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LockRequest.java b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LockRequest.java index 06722f1ab2..ad6cc0482d 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LockRequest.java +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LockRequest.java @@ -43,7 +43,6 @@ private static final org.apache.thrift.protocol.TField USER_FIELD_DESC = new org.apache.thrift.protocol.TField("user", org.apache.thrift.protocol.TType.STRING, (short)3); private static final org.apache.thrift.protocol.TField HOSTNAME_FIELD_DESC = new org.apache.thrift.protocol.TField("hostname", org.apache.thrift.protocol.TType.STRING, (short)4); private static final org.apache.thrift.protocol.TField AGENT_INFO_FIELD_DESC = new org.apache.thrift.protocol.TField("agentInfo", org.apache.thrift.protocol.TType.STRING, (short)5); - private static final org.apache.thrift.protocol.TField ZERO_WAIT_READ_ENABLED_FIELD_DESC = new org.apache.thrift.protocol.TField("zeroWaitReadEnabled", org.apache.thrift.protocol.TType.BOOL, (short)6); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -56,7 +55,6 @@ private String user; // required private String hostname; // required private String agentInfo; // optional - private boolean zeroWaitReadEnabled; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -64,8 +62,7 @@ TXNID((short)2, "txnid"), USER((short)3, "user"), HOSTNAME((short)4, "hostname"), - AGENT_INFO((short)5, "agentInfo"), - ZERO_WAIT_READ_ENABLED((short)6, "zeroWaitReadEnabled"); + AGENT_INFO((short)5, "agentInfo"); private static final Map byName = new HashMap(); @@ -90,8 +87,6 @@ public static _Fields findByThriftId(int fieldId) { return HOSTNAME; case 5: // AGENT_INFO return AGENT_INFO; - case 6: // ZERO_WAIT_READ_ENABLED - return ZERO_WAIT_READ_ENABLED; default: return null; } @@ -133,9 +128,8 @@ public String getFieldName() { // isset id assignments private static final int __TXNID_ISSET_ID = 0; - private static final int __ZEROWAITREADENABLED_ISSET_ID = 1; private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.TXNID,_Fields.AGENT_INFO,_Fields.ZERO_WAIT_READ_ENABLED}; + private static final _Fields optionals[] = {_Fields.TXNID,_Fields.AGENT_INFO}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); @@ -150,8 +144,6 @@ public String getFieldName() { new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); tmpMap.put(_Fields.AGENT_INFO, new org.apache.thrift.meta_data.FieldMetaData("agentInfo", org.apache.thrift.TFieldRequirementType.OPTIONAL, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.ZERO_WAIT_READ_ENABLED, new org.apache.thrift.meta_data.FieldMetaData("zeroWaitReadEnabled", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(LockRequest.class, metaDataMap); } @@ -159,8 +151,6 @@ public String getFieldName() { public LockRequest() { this.agentInfo = "Unknown"; - this.zeroWaitReadEnabled = false; - } public LockRequest( @@ -196,7 +186,6 @@ public LockRequest(LockRequest other) { if (other.isSetAgentInfo()) { this.agentInfo = other.agentInfo; } - this.zeroWaitReadEnabled = other.zeroWaitReadEnabled; } public LockRequest deepCopy() { @@ -212,8 +201,6 @@ public void clear() { this.hostname = null; this.agentInfo = "Unknown"; - this.zeroWaitReadEnabled = false; - } public int getComponentSize() { @@ -345,28 +332,6 @@ public void setAgentInfoIsSet(boolean value) { } } - public boolean isZeroWaitReadEnabled() { - return this.zeroWaitReadEnabled; - } - - public void setZeroWaitReadEnabled(boolean zeroWaitReadEnabled) { - this.zeroWaitReadEnabled = zeroWaitReadEnabled; - setZeroWaitReadEnabledIsSet(true); - } - - public void unsetZeroWaitReadEnabled() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __ZEROWAITREADENABLED_ISSET_ID); - } - - /** Returns true if field zeroWaitReadEnabled is set (has been assigned a value) and false otherwise */ - public boolean isSetZeroWaitReadEnabled() { - return EncodingUtils.testBit(__isset_bitfield, __ZEROWAITREADENABLED_ISSET_ID); - } - - public void setZeroWaitReadEnabledIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __ZEROWAITREADENABLED_ISSET_ID, value); - } - public void setFieldValue(_Fields field, Object value) { switch (field) { case COMPONENT: @@ -409,14 +374,6 @@ public void setFieldValue(_Fields field, Object value) { } break; - case ZERO_WAIT_READ_ENABLED: - if (value == null) { - unsetZeroWaitReadEnabled(); - } else { - setZeroWaitReadEnabled((Boolean)value); - } - break; - } } @@ -437,9 +394,6 @@ public Object getFieldValue(_Fields field) { case AGENT_INFO: return getAgentInfo(); - case ZERO_WAIT_READ_ENABLED: - return isZeroWaitReadEnabled(); - } throw new IllegalStateException(); } @@ -461,8 +415,6 @@ public boolean isSet(_Fields field) { return isSetHostname(); case AGENT_INFO: return isSetAgentInfo(); - case ZERO_WAIT_READ_ENABLED: - return isSetZeroWaitReadEnabled(); } throw new IllegalStateException(); } @@ -525,15 +477,6 @@ public boolean equals(LockRequest that) { return false; } - boolean this_present_zeroWaitReadEnabled = true && this.isSetZeroWaitReadEnabled(); - boolean that_present_zeroWaitReadEnabled = true && that.isSetZeroWaitReadEnabled(); - if (this_present_zeroWaitReadEnabled || that_present_zeroWaitReadEnabled) { - if (!(this_present_zeroWaitReadEnabled && that_present_zeroWaitReadEnabled)) - return false; - if (this.zeroWaitReadEnabled != that.zeroWaitReadEnabled) - return false; - } - return true; } @@ -566,11 +509,6 @@ public int hashCode() { if (present_agentInfo) list.add(agentInfo); - boolean present_zeroWaitReadEnabled = true && (isSetZeroWaitReadEnabled()); - list.add(present_zeroWaitReadEnabled); - if (present_zeroWaitReadEnabled) - list.add(zeroWaitReadEnabled); - return list.hashCode(); } @@ -632,16 +570,6 @@ public int compareTo(LockRequest other) { return lastComparison; } } - lastComparison = Boolean.valueOf(isSetZeroWaitReadEnabled()).compareTo(other.isSetZeroWaitReadEnabled()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetZeroWaitReadEnabled()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.zeroWaitReadEnabled, other.zeroWaitReadEnabled); - if (lastComparison != 0) { - return lastComparison; - } - } return 0; } @@ -701,12 +629,6 @@ public String toString() { } first = false; } - if (isSetZeroWaitReadEnabled()) { - if (!first) sb.append(", "); - sb.append("zeroWaitReadEnabled:"); - sb.append(this.zeroWaitReadEnabled); - first = false; - } sb.append(")"); return sb.toString(); } @@ -815,14 +737,6 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, LockRequest struct) org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; - case 6: // ZERO_WAIT_READ_ENABLED - if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { - struct.zeroWaitReadEnabled = iprot.readBool(); - struct.setZeroWaitReadEnabledIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -870,11 +784,6 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, LockRequest struct oprot.writeFieldEnd(); } } - if (struct.isSetZeroWaitReadEnabled()) { - oprot.writeFieldBegin(ZERO_WAIT_READ_ENABLED_FIELD_DESC); - oprot.writeBool(struct.zeroWaitReadEnabled); - oprot.writeFieldEnd(); - } oprot.writeFieldStop(); oprot.writeStructEnd(); } @@ -908,19 +817,13 @@ public void write(org.apache.thrift.protocol.TProtocol prot, LockRequest struct) if (struct.isSetAgentInfo()) { optionals.set(1); } - if (struct.isSetZeroWaitReadEnabled()) { - optionals.set(2); - } - oprot.writeBitSet(optionals, 3); + oprot.writeBitSet(optionals, 2); if (struct.isSetTxnid()) { oprot.writeI64(struct.txnid); } if (struct.isSetAgentInfo()) { oprot.writeString(struct.agentInfo); } - if (struct.isSetZeroWaitReadEnabled()) { - oprot.writeBool(struct.zeroWaitReadEnabled); - } } @Override @@ -942,7 +845,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, LockRequest struct) struct.setUserIsSet(true); struct.hostname = iprot.readString(); struct.setHostnameIsSet(true); - BitSet incoming = iprot.readBitSet(3); + BitSet incoming = iprot.readBitSet(2); if (incoming.get(0)) { struct.txnid = iprot.readI64(); struct.setTxnidIsSet(true); @@ -951,10 +854,6 @@ public void read(org.apache.thrift.protocol.TProtocol prot, LockRequest struct) struct.agentInfo = iprot.readString(); struct.setAgentInfoIsSet(true); } - if (incoming.get(2)) { - struct.zeroWaitReadEnabled = iprot.readBool(); - struct.setZeroWaitReadEnabledIsSet(true); - } } } diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LockResponse.java b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LockResponse.java index e0f88f897c..fdaab4b394 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LockResponse.java +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LockResponse.java @@ -40,7 +40,6 @@ private static final org.apache.thrift.protocol.TField LOCKID_FIELD_DESC = new org.apache.thrift.protocol.TField("lockid", org.apache.thrift.protocol.TType.I64, (short)1); private static final org.apache.thrift.protocol.TField STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("state", org.apache.thrift.protocol.TType.I32, (short)2); - private static final org.apache.thrift.protocol.TField ERROR_MESSAGE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorMessage", org.apache.thrift.protocol.TType.STRING, (short)3); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -50,7 +49,6 @@ private long lockid; // required private LockState state; // required - private String errorMessage; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -59,8 +57,7 @@ * * @see LockState */ - STATE((short)2, "state"), - ERROR_MESSAGE((short)3, "errorMessage"); + STATE((short)2, "state"); private static final Map byName = new HashMap(); @@ -79,8 +76,6 @@ public static _Fields findByThriftId(int fieldId) { return LOCKID; case 2: // STATE return STATE; - case 3: // ERROR_MESSAGE - return ERROR_MESSAGE; default: return null; } @@ -123,7 +118,6 @@ public String getFieldName() { // isset id assignments private static final int __LOCKID_ISSET_ID = 0; private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.ERROR_MESSAGE}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); @@ -131,8 +125,6 @@ public String getFieldName() { new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.STATE, new org.apache.thrift.meta_data.FieldMetaData("state", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, LockState.class))); - tmpMap.put(_Fields.ERROR_MESSAGE, new org.apache.thrift.meta_data.FieldMetaData("errorMessage", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(LockResponse.class, metaDataMap); } @@ -159,9 +151,6 @@ public LockResponse(LockResponse other) { if (other.isSetState()) { this.state = other.state; } - if (other.isSetErrorMessage()) { - this.errorMessage = other.errorMessage; - } } public LockResponse deepCopy() { @@ -173,7 +162,6 @@ public void clear() { setLockidIsSet(false); this.lockid = 0; this.state = null; - this.errorMessage = null; } public long getLockid() { @@ -229,29 +217,6 @@ public void setStateIsSet(boolean value) { } } - public String getErrorMessage() { - return this.errorMessage; - } - - public void setErrorMessage(String errorMessage) { - this.errorMessage = errorMessage; - } - - public void unsetErrorMessage() { - this.errorMessage = null; - } - - /** Returns true if field errorMessage is set (has been assigned a value) and false otherwise */ - public boolean isSetErrorMessage() { - return this.errorMessage != null; - } - - public void setErrorMessageIsSet(boolean value) { - if (!value) { - this.errorMessage = null; - } - } - public void setFieldValue(_Fields field, Object value) { switch (field) { case LOCKID: @@ -270,14 +235,6 @@ public void setFieldValue(_Fields field, Object value) { } break; - case ERROR_MESSAGE: - if (value == null) { - unsetErrorMessage(); - } else { - setErrorMessage((String)value); - } - break; - } } @@ -289,9 +246,6 @@ public Object getFieldValue(_Fields field) { case STATE: return getState(); - case ERROR_MESSAGE: - return getErrorMessage(); - } throw new IllegalStateException(); } @@ -307,8 +261,6 @@ public boolean isSet(_Fields field) { return isSetLockid(); case STATE: return isSetState(); - case ERROR_MESSAGE: - return isSetErrorMessage(); } throw new IllegalStateException(); } @@ -344,15 +296,6 @@ public boolean equals(LockResponse that) { return false; } - boolean this_present_errorMessage = true && this.isSetErrorMessage(); - boolean that_present_errorMessage = true && that.isSetErrorMessage(); - if (this_present_errorMessage || that_present_errorMessage) { - if (!(this_present_errorMessage && that_present_errorMessage)) - return false; - if (!this.errorMessage.equals(that.errorMessage)) - return false; - } - return true; } @@ -370,11 +313,6 @@ public int hashCode() { if (present_state) list.add(state.getValue()); - boolean present_errorMessage = true && (isSetErrorMessage()); - list.add(present_errorMessage); - if (present_errorMessage) - list.add(errorMessage); - return list.hashCode(); } @@ -406,16 +344,6 @@ public int compareTo(LockResponse other) { return lastComparison; } } - lastComparison = Boolean.valueOf(isSetErrorMessage()).compareTo(other.isSetErrorMessage()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetErrorMessage()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorMessage, other.errorMessage); - if (lastComparison != 0) { - return lastComparison; - } - } return 0; } @@ -447,16 +375,6 @@ public String toString() { sb.append(this.state); } first = false; - if (isSetErrorMessage()) { - if (!first) sb.append(", "); - sb.append("errorMessage:"); - if (this.errorMessage == null) { - sb.append("null"); - } else { - sb.append(this.errorMessage); - } - first = false; - } sb.append(")"); return sb.toString(); } @@ -526,14 +444,6 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, LockResponse struct org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; - case 3: // ERROR_MESSAGE - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.errorMessage = iprot.readString(); - struct.setErrorMessageIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -555,13 +465,6 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, LockResponse struc oprot.writeI32(struct.state.getValue()); oprot.writeFieldEnd(); } - if (struct.errorMessage != null) { - if (struct.isSetErrorMessage()) { - oprot.writeFieldBegin(ERROR_MESSAGE_FIELD_DESC); - oprot.writeString(struct.errorMessage); - oprot.writeFieldEnd(); - } - } oprot.writeFieldStop(); oprot.writeStructEnd(); } @@ -581,14 +484,6 @@ public void write(org.apache.thrift.protocol.TProtocol prot, LockResponse struct TTupleProtocol oprot = (TTupleProtocol) prot; oprot.writeI64(struct.lockid); oprot.writeI32(struct.state.getValue()); - BitSet optionals = new BitSet(); - if (struct.isSetErrorMessage()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetErrorMessage()) { - oprot.writeString(struct.errorMessage); - } } @Override @@ -598,11 +493,6 @@ public void read(org.apache.thrift.protocol.TProtocol prot, LockResponse struct) struct.setLockidIsSet(true); struct.state = org.apache.hadoop.hive.metastore.api.LockState.findByValue(iprot.readI32()); struct.setStateIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.errorMessage = iprot.readString(); - struct.setErrorMessageIsSet(true); - } } } diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Types.php b/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Types.php index e4b0bc726c..92e47ed58b 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Types.php +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Types.php @@ -20750,10 +20750,6 @@ class LockRequest { * @var string */ public $agentInfo = "Unknown"; - /** - * @var bool - */ - public $zeroWaitReadEnabled = false; public function __construct($vals=null) { if (!isset(self::$_TSPEC)) { @@ -20783,10 +20779,6 @@ class LockRequest { 'var' => 'agentInfo', 'type' => TType::STRING, ), - 6 => array( - 'var' => 'zeroWaitReadEnabled', - 'type' => TType::BOOL, - ), ); } if (is_array($vals)) { @@ -20805,9 +20797,6 @@ class LockRequest { if (isset($vals['agentInfo'])) { $this->agentInfo = $vals['agentInfo']; } - if (isset($vals['zeroWaitReadEnabled'])) { - $this->zeroWaitReadEnabled = $vals['zeroWaitReadEnabled']; - } } } @@ -20876,13 +20865,6 @@ class LockRequest { $xfer += $input->skip($ftype); } break; - case 6: - if ($ftype == TType::BOOL) { - $xfer += $input->readBool($this->zeroWaitReadEnabled); - } else { - $xfer += $input->skip($ftype); - } - break; default: $xfer += $input->skip($ftype); break; @@ -20933,11 +20915,6 @@ class LockRequest { $xfer += $output->writeString($this->agentInfo); $xfer += $output->writeFieldEnd(); } - if ($this->zeroWaitReadEnabled !== null) { - $xfer += $output->writeFieldBegin('zeroWaitReadEnabled', TType::BOOL, 6); - $xfer += $output->writeBool($this->zeroWaitReadEnabled); - $xfer += $output->writeFieldEnd(); - } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; @@ -20956,10 +20933,6 @@ class LockResponse { * @var int */ public $state = null; - /** - * @var string - */ - public $errorMessage = null; public function __construct($vals=null) { if (!isset(self::$_TSPEC)) { @@ -20972,10 +20945,6 @@ class LockResponse { 'var' => 'state', 'type' => TType::I32, ), - 3 => array( - 'var' => 'errorMessage', - 'type' => TType::STRING, - ), ); } if (is_array($vals)) { @@ -20985,9 +20954,6 @@ class LockResponse { if (isset($vals['state'])) { $this->state = $vals['state']; } - if (isset($vals['errorMessage'])) { - $this->errorMessage = $vals['errorMessage']; - } } } @@ -21024,13 +20990,6 @@ class LockResponse { $xfer += $input->skip($ftype); } break; - case 3: - if ($ftype == TType::STRING) { - $xfer += $input->readString($this->errorMessage); - } else { - $xfer += $input->skip($ftype); - } - break; default: $xfer += $input->skip($ftype); break; @@ -21054,11 +21013,6 @@ class LockResponse { $xfer += $output->writeI32($this->state); $xfer += $output->writeFieldEnd(); } - if ($this->errorMessage !== null) { - $xfer += $output->writeFieldBegin('errorMessage', TType::STRING, 3); - $xfer += $output->writeString($this->errorMessage); - $xfer += $output->writeFieldEnd(); - } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py index 1a0fee319b..eda7817475 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py @@ -14421,7 +14421,6 @@ class LockRequest: - user - hostname - agentInfo - - zeroWaitReadEnabled """ thrift_spec = ( @@ -14431,16 +14430,14 @@ class LockRequest: (3, TType.STRING, 'user', None, None, ), # 3 (4, TType.STRING, 'hostname', None, None, ), # 4 (5, TType.STRING, 'agentInfo', None, "Unknown", ), # 5 - (6, TType.BOOL, 'zeroWaitReadEnabled', None, False, ), # 6 ) - def __init__(self, component=None, txnid=None, user=None, hostname=None, agentInfo=thrift_spec[5][4], zeroWaitReadEnabled=thrift_spec[6][4],): + def __init__(self, component=None, txnid=None, user=None, hostname=None, agentInfo=thrift_spec[5][4],): self.component = component self.txnid = txnid self.user = user self.hostname = hostname self.agentInfo = agentInfo - self.zeroWaitReadEnabled = zeroWaitReadEnabled def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -14482,11 +14479,6 @@ def read(self, iprot): self.agentInfo = iprot.readString() else: iprot.skip(ftype) - elif fid == 6: - if ftype == TType.BOOL: - self.zeroWaitReadEnabled = iprot.readBool() - else: - iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -14520,10 +14512,6 @@ def write(self, oprot): oprot.writeFieldBegin('agentInfo', TType.STRING, 5) oprot.writeString(self.agentInfo) oprot.writeFieldEnd() - if self.zeroWaitReadEnabled is not None: - oprot.writeFieldBegin('zeroWaitReadEnabled', TType.BOOL, 6) - oprot.writeBool(self.zeroWaitReadEnabled) - oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -14544,7 +14532,6 @@ def __hash__(self): value = (value * 31) ^ hash(self.user) value = (value * 31) ^ hash(self.hostname) value = (value * 31) ^ hash(self.agentInfo) - value = (value * 31) ^ hash(self.zeroWaitReadEnabled) return value def __repr__(self): @@ -14563,20 +14550,17 @@ class LockResponse: Attributes: - lockid - state - - errorMessage """ thrift_spec = ( None, # 0 (1, TType.I64, 'lockid', None, None, ), # 1 (2, TType.I32, 'state', None, None, ), # 2 - (3, TType.STRING, 'errorMessage', None, None, ), # 3 ) - def __init__(self, lockid=None, state=None, errorMessage=None,): + def __init__(self, lockid=None, state=None,): self.lockid = lockid self.state = state - self.errorMessage = errorMessage def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -14597,11 +14581,6 @@ def read(self, iprot): self.state = iprot.readI32() else: iprot.skip(ftype) - elif fid == 3: - if ftype == TType.STRING: - self.errorMessage = iprot.readString() - else: - iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -14620,10 +14599,6 @@ def write(self, oprot): oprot.writeFieldBegin('state', TType.I32, 2) oprot.writeI32(self.state) oprot.writeFieldEnd() - if self.errorMessage is not None: - oprot.writeFieldBegin('errorMessage', TType.STRING, 3) - oprot.writeString(self.errorMessage) - oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -14639,7 +14614,6 @@ def __hash__(self): value = 17 value = (value * 31) ^ hash(self.lockid) value = (value * 31) ^ hash(self.state) - value = (value * 31) ^ hash(self.errorMessage) return value def __repr__(self): diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_types.rb b/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_types.rb index e6224ec16f..86aa4f3f3b 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_types.rb +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_types.rb @@ -3207,15 +3207,13 @@ class LockRequest USER = 3 HOSTNAME = 4 AGENTINFO = 5 - ZEROWAITREADENABLED = 6 FIELDS = { COMPONENT => {:type => ::Thrift::Types::LIST, :name => 'component', :element => {:type => ::Thrift::Types::STRUCT, :class => ::LockComponent}}, TXNID => {:type => ::Thrift::Types::I64, :name => 'txnid', :optional => true}, USER => {:type => ::Thrift::Types::STRING, :name => 'user'}, HOSTNAME => {:type => ::Thrift::Types::STRING, :name => 'hostname'}, - AGENTINFO => {:type => ::Thrift::Types::STRING, :name => 'agentInfo', :default => %q"Unknown", :optional => true}, - ZEROWAITREADENABLED => {:type => ::Thrift::Types::BOOL, :name => 'zeroWaitReadEnabled', :default => false, :optional => true} + AGENTINFO => {:type => ::Thrift::Types::STRING, :name => 'agentInfo', :default => %q"Unknown", :optional => true} } def struct_fields; FIELDS; end @@ -3233,12 +3231,10 @@ class LockResponse include ::Thrift::Struct, ::Thrift::Struct_Union LOCKID = 1 STATE = 2 - ERRORMESSAGE = 3 FIELDS = { LOCKID => {:type => ::Thrift::Types::I64, :name => 'lockid'}, - STATE => {:type => ::Thrift::Types::I32, :name => 'state', :enum_class => ::LockState}, - ERRORMESSAGE => {:type => ::Thrift::Types::STRING, :name => 'errorMessage', :optional => true} + STATE => {:type => ::Thrift::Types::I32, :name => 'state', :enum_class => ::LockState} } def struct_fields; FIELDS; end diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/LockRequestBuilder.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/LockRequestBuilder.java index b43410d44c..93da0f60ec 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/LockRequestBuilder.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/LockRequestBuilder.java @@ -85,11 +85,6 @@ public LockRequestBuilder setUser(String user) { return this; } - public LockRequestBuilder setZeroWaitReadEnabled(boolean zeroWaitRead) { - req.setZeroWaitReadEnabled(zeroWaitRead); - return this; - } - /** * Add a lock component to the lock request * @param component to add diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java index 8e1bb4e493..690b1f3c2e 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java @@ -60,7 +60,7 @@ private static boolean inited = false; private static boolean enabled = false; private static Map encryptionZoneToCmrootMapping = new HashMap<>(); - private static HadoopShims hadoopShims = ShimLoader.getHadoopShims(); + private HadoopShims hadoopShims; private static Configuration conf; private String msUser; private String msGroup; @@ -153,6 +153,7 @@ private ReplChangeManager(Configuration conf) throws MetaException { if (MetastoreConf.getBoolVar(conf, ConfVars.REPLCMENABLED)) { ReplChangeManager.enabled = true; ReplChangeManager.conf = conf; + hadoopShims = ShimLoader.getHadoopShims(); cmRootDir = MetastoreConf.getVar(conf, ConfVars.REPLCMDIR); encryptedCmRootDir = MetastoreConf.getVar(conf, ConfVars.REPLCMENCRYPTEDDIR); fallbackNonEncryptedCmRootDir = MetastoreConf.getVar(conf, ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR); @@ -403,7 +404,7 @@ public static FileInfo getFileInfo(Path src, String checksumString, String srcCM */ // TODO: this needs to be enhanced once change management based filesystem is implemented // Currently using fileuri#checksum#cmrooturi#subdirs as the format - public static String encodeFileUri(String fileUriStr, String fileChecksum, String encodedSubDir) + public String encodeFileUri(String fileUriStr, String fileChecksum, String encodedSubDir) throws IOException { if (instance == null) { throw new IllegalStateException("Uninitialized ReplChangeManager instance."); @@ -551,7 +552,7 @@ public static String joinWithSeparator(Iterable strings) { } @VisibleForTesting - static Path getCmRoot(Path path) throws IOException { + Path getCmRoot(Path path) throws IOException { Path cmroot = null; //Default path if hive.repl.cm dir is encrypted String cmrootDir = fallbackNonEncryptedCmRootDir; diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 842b7fe53d..9ce0085b0c 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -1200,8 +1200,6 @@ public static ConfVars getMetaConf(String name) { "class is used to store and retrieve transactions and locks"), TXN_TIMEOUT("metastore.txn.timeout", "hive.txn.timeout", 300, TimeUnit.SECONDS, "time after which transactions are declared aborted if the client has not sent a heartbeat."), - TXN_OPENTXN_TIMEOUT("metastore.txn.opentxn.timeout", "hive.txn.opentxn.timeout", 1000, TimeUnit.MILLISECONDS, - "Time before an open transaction operation should persist, otherwise it is considered invalid and rolled back"), URI_RESOLVER("metastore.uri.resolver", "hive.metastore.uri.resolver", "", "If set, fully qualified class name of resolver for hive metastore uri's"), USERS_IN_ADMIN_ROLE("metastore.users.in.admin.role", "hive.users.in.admin.role", "", false, diff --git a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift index 8462b3d7a3..c21923e817 100644 --- a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift +++ b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift @@ -1080,13 +1080,11 @@ struct LockRequest { 3: required string user, // used in 'show locks' to help admins find who has open locks 4: required string hostname, // used in 'show locks' to help admins find who has open locks 5: optional string agentInfo = "Unknown", - 6: optional bool zeroWaitReadEnabled = false } struct LockResponse { 1: required i64 lockid, 2: required LockState state, - 3: optional string errorMessage } struct CheckLockRequest { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/SQLGenerator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/SQLGenerator.java index 08ef5e912e..49b737ecf9 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/SQLGenerator.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/SQLGenerator.java @@ -99,7 +99,7 @@ public SQLGenerator(DatabaseProduct dbProduct, Configuration conf) { } /** - * Generates "Insert into T(a,b,c) values(1,2,'f'),(3,4,'c')" for appropriate DB. + * Generates "Insert into T(a,b,c) values(1,2,'f'),(3,4,'c')" for appropriate DB * * @param tblColumns e.g. "T(a,b,c)" * @param rows e.g. list of Strings like 3,4,'d' @@ -110,7 +110,7 @@ public SQLGenerator(DatabaseProduct dbProduct, Configuration conf) { } /** - * Generates "Insert into T(a,b,c) values(1,2,'f'),(3,4,'c')" for appropriate DB. + * Generates "Insert into T(a,b,c) values(1,2,'f'),(3,4,'c')" for appropriate DB * * @param tblColumns e.g. "T(a,b,c)" * @param rows e.g. list of Strings like 3,4,'d' @@ -263,7 +263,7 @@ public String addLimitClause(int numRows, String noSelectsqlQuery) throws MetaEx * @throws SQLException */ public PreparedStatement prepareStmtWithParameters(Connection dbConn, String sql, List parameters) - throws SQLException { + throws SQLException { PreparedStatement pst = dbConn.prepareStatement(addEscapeCharacters(sql)); if ((parameters == null) || parameters.isEmpty()) { return pst; @@ -292,36 +292,4 @@ public String addEscapeCharacters(String s) { return s; } - - /** - * Creates a lock statement for open/commit transaction based on the dbProduct in shared read / exclusive mode. - * @param shared shared or exclusive lock - * @return sql statement to execute - * @throws MetaException if the dbProduct is unknown - */ - public String createTxnLockStatement(boolean shared) throws MetaException{ - String txnLockTable = "TXN_LOCK_TBL"; - switch (dbProduct) { - case MYSQL: - // For Mysql we do not use lock table statement for two reasons - // It is not released automatically on commit/rollback - // It requires to lock every table that will be used by the statement - // https://dev.mysql.com/doc/refman/8.0/en/lock-tables.html - return "SELECT \"TXN_LOCK\" FROM \"" + txnLockTable + "\" " + (shared ? "LOCK IN SHARE MODE" : "FOR UPDATE"); - case POSTGRES: - // https://www.postgresql.org/docs/9.4/sql-lock.html - case DERBY: - // https://db.apache.org/derby/docs/10.4/ref/rrefsqlj40506.html - case ORACLE: - // https://docs.oracle.com/cd/B19306_01/server.102/b14200/statements_9015.htm - return "LOCK TABLE \"" + txnLockTable + "\" IN " + (shared ? "SHARE" : "EXCLUSIVE") + " MODE"; - case SQLSERVER: - // https://docs.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver15 - return "SELECT * FROM \"" + txnLockTable + "\" WITH (" + (shared ? "TABLOCK" : "TABLOCKX") + ", HOLDLOCK)"; - default: - String msg = "Unrecognized database product name <" + dbProduct + ">"; - LOG.error(msg); - throw new MetaException(msg); - } - } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java index a1bc10955a..2344c2d5f6 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java @@ -223,7 +223,7 @@ public void markCompacted(CompactionInfo info) throws MetaException { stmt = dbConn.createStatement(); String s = "UPDATE \"COMPACTION_QUEUE\" SET \"CQ_STATE\" = '" + READY_FOR_CLEANING + "', " + "\"CQ_WORKER_ID\" = NULL, \"CQ_NEXT_TXN_ID\" = " - + "(SELECT MAX(\"TXN_ID\") + 1 FROM \"TXNS\")" + + "(SELECT \"NTXN_NEXT\" FROM \"NEXT_TXN_ID\")" + " WHERE \"CQ_ID\" = " + info.id; LOG.debug("Going to execute update <" + s + ">"); int updCnt = stmt.executeUpdate(s); @@ -474,7 +474,7 @@ public void markCleaned(CompactionInfo info) throws MetaException { } /** * Clean up entries from TXN_TO_WRITE_ID table less than min_uncommited_txnid as found by - * min(max(TXNS.txn_id), min(WRITE_SET.WS_COMMIT_ID), min(Aborted TXNS.txn_id)). + * min(NEXT_TXN_ID.ntxn_next, min(WRITE_SET.WS_COMMIT_ID), min(Aborted TXNS.txn_id)). */ @Override @RetrySemantics.SafeToRetry @@ -492,9 +492,9 @@ public void cleanTxnToWriteIdTable() throws MetaException { // First need to find the min_uncommitted_txnid which is currently seen by any open transactions. // If there are no txns which are currently open or aborted in the system, then current value of - // max(TXNS.txn_id) could be min_uncommitted_txnid. + // NEXT_TXN_ID.ntxn_next could be min_uncommitted_txnid. String s = "SELECT MIN(\"RES\".\"ID\") AS \"ID\" FROM (" + - "SELECT MAX(\"TXN_ID\") + 1 AS \"ID\" FROM \"TXNS\" " + + "SELECT MIN(\"NTXN_NEXT\") AS \"ID\" FROM \"NEXT_TXN_ID\" " + "UNION " + "SELECT MIN(\"WS_COMMIT_ID\") AS \"ID\" FROM \"WRITE_SET\" " + "UNION " + @@ -504,7 +504,7 @@ public void cleanTxnToWriteIdTable() throws MetaException { LOG.debug("Going to execute query <" + s + ">"); rs = stmt.executeQuery(s); if (!rs.next()) { - throw new MetaException("Transaction tables not properly initialized, no record found in TXNS"); + throw new MetaException("Transaction tables not properly initialized, no record found in NEXT_TXN_ID"); } long minUncommitedTxnid = rs.getLong(1); @@ -533,36 +533,25 @@ public void cleanTxnToWriteIdTable() throws MetaException { } /** - * Clean up aborted / committed transactions from txns that have no components in txn_components. - * The committed txns are left there for TXN_OPENTXN_TIMEOUT window period intentionally. - * The reason such aborted txns exist can be that now work was done in this txn - * (e.g. Streaming opened TransactionBatch and abandoned it w/o doing any work) - * or due to {@link #markCleaned(CompactionInfo)} being called. + * Clean up aborted transactions from txns that have no components in txn_components. The reason such + * txns exist can be that now work was done in this txn (e.g. Streaming opened TransactionBatch and + * abandoned it w/o doing any work) or due to {@link #markCleaned(CompactionInfo)} being called. */ @Override @RetrySemantics.SafeToRetry - public void cleanEmptyAbortedAndCommittedTxns() throws MetaException { - LOG.info("Start to clean empty aborted or committed TXNS"); + public void cleanEmptyAbortedTxns() throws MetaException { try { Connection dbConn = null; Statement stmt = null; ResultSet rs = null; try { - //Aborted and committed are terminal states, so nothing about the txn can change + //Aborted is a terminal state, so nothing about the txn can change //after that, so READ COMMITTED is sufficient. dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED); stmt = dbConn.createStatement(); - /** - * Only delete aborted / committed transaction in a way that guarantees two things: - * 1. never deletes anything that is inside the TXN_OPENTXN_TIMEOUT window - * 2. never deletes the maximum txnId even if it is before the TXN_OPENTXN_TIMEOUT window - */ - long lowWaterMark = getOpenTxnTimeoutLowBoundaryTxnId(dbConn); - String s = "SELECT \"TXN_ID\" FROM \"TXNS\" WHERE " + "\"TXN_ID\" NOT IN (SELECT \"TC_TXNID\" FROM \"TXN_COMPONENTS\") AND " + - " (\"TXN_STATE\" = '" + TXN_ABORTED + "' OR \"TXN_STATE\" = '" + TXN_COMMITTED + "') AND " - + " \"TXN_ID\" < " + lowWaterMark; + "\"TXN_STATE\" = '" + TXN_ABORTED + "'"; LOG.debug("Going to execute query <" + s + ">"); rs = stmt.executeQuery(s); List txnids = new ArrayList<>(); @@ -579,15 +568,16 @@ public void cleanEmptyAbortedAndCommittedTxns() throws MetaException { // Delete from TXNS. prefix.append("DELETE FROM \"TXNS\" WHERE "); + suffix.append(""); TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix, txnids, "\"TXN_ID\"", false, false); for (String query : queries) { LOG.debug("Going to execute update <" + query + ">"); int rc = stmt.executeUpdate(query); - LOG.debug("Removed " + rc + " empty Aborted and Committed transactions from TXNS"); + LOG.info("Removed " + rc + " empty Aborted transactions from TXNS"); } - LOG.info("Aborted and committed transactions removed from TXNS: " + txnids); + LOG.info("Aborted transactions removed from TXNS: " + txnids); LOG.debug("Going to commit"); dbConn.commit(); } catch (SQLException e) { @@ -601,7 +591,7 @@ public void cleanEmptyAbortedAndCommittedTxns() throws MetaException { close(rs, stmt, dbConn); } } catch (RetryException e) { - cleanEmptyAbortedAndCommittedTxns(); + cleanEmptyAbortedTxns(); } } @@ -1157,12 +1147,12 @@ public long findMinOpenTxnIdForCleaner() throws MetaException{ + quoteChar(READY_FOR_CLEANING) + ") \"RES\""; } else { - query = "SELECT MAX(\"TXN_ID\") + 1 FROM \"TXNS\""; + query = "SELECT \"NTXN_NEXT\" FROM \"NEXT_TXN_ID\""; } LOG.debug("Going to execute query <" + query + ">"); rs = stmt.executeQuery(query); if (!rs.next()) { - throw new MetaException("Transaction tables not properly initialized, no record found in TXNS"); + throw new MetaException("Transaction tables not properly initialized, no record found in NEXT_TXN_ID"); } return rs.getLong(1); } catch (SQLException e) { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java index 7a90316f3c..97a083399a 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java @@ -218,107 +218,63 @@ private static String ensurePathEndsInSlash(String path) { public static void cleanDb(Configuration conf) throws Exception { LOG.info("Cleaning transactional tables"); - - boolean success = true; - Connection conn = null; - Statement stmt = null; - try { - conn = getConnection(conf); - stmt = conn.createStatement(); - if (!checkDbPrepared(stmt)){ - // Nothing to clean - return; - } - - // We want to try these, whether they succeed or fail. - success &= truncateTable(conn, stmt, "TXN_COMPONENTS"); - success &= truncateTable(conn, stmt, "COMPLETED_TXN_COMPONENTS"); - success &= truncateTable(conn, stmt, "TXNS"); - success &= truncateTable(conn, stmt, "TXN_TO_WRITE_ID"); - success &= truncateTable(conn, stmt, "NEXT_WRITE_ID"); - success &= truncateTable(conn, stmt, "HIVE_LOCKS"); - success &= truncateTable(conn, stmt, "NEXT_LOCK_ID"); - success &= truncateTable(conn, stmt, "COMPACTION_QUEUE"); - success &= truncateTable(conn, stmt, "NEXT_COMPACTION_QUEUE_ID"); - success &= truncateTable(conn, stmt, "COMPLETED_COMPACTIONS"); - success &= truncateTable(conn, stmt, "AUX_TABLE"); - success &= truncateTable(conn, stmt, "WRITE_SET"); - success &= truncateTable(conn, stmt, "REPL_TXN_MAP"); - success &= truncateTable(conn, stmt, "MATERIALIZATION_REBUILD_LOCKS"); + int retryCount = 0; + while(++retryCount <= 3) { + boolean success = true; + Connection conn = null; + Statement stmt = null; try { - resetTxnSequence(conn, stmt); - stmt.executeUpdate("INSERT INTO \"NEXT_LOCK_ID\" VALUES(1)"); - stmt.executeUpdate("INSERT INTO \"NEXT_COMPACTION_QUEUE_ID\" VALUES(1)"); - } catch (SQLException e) { - if (!getTableNotExistsErrorCodes().contains(e.getSQLState())) { - LOG.error("Error initializing sequence values", e); - success = false; + conn = getConnection(conf); + stmt = conn.createStatement(); + + // We want to try these, whether they succeed or fail. + success &= truncateTable(conn, stmt, "TXN_COMPONENTS"); + success &= truncateTable(conn, stmt, "COMPLETED_TXN_COMPONENTS"); + success &= truncateTable(conn, stmt, "TXNS"); + success &= truncateTable(conn, stmt, "NEXT_TXN_ID"); + success &= truncateTable(conn, stmt, "TXN_TO_WRITE_ID"); + success &= truncateTable(conn, stmt, "NEXT_WRITE_ID"); + success &= truncateTable(conn, stmt, "HIVE_LOCKS"); + success &= truncateTable(conn, stmt, "NEXT_LOCK_ID"); + success &= truncateTable(conn, stmt, "COMPACTION_QUEUE"); + success &= truncateTable(conn, stmt, "NEXT_COMPACTION_QUEUE_ID"); + success &= truncateTable(conn, stmt, "COMPLETED_COMPACTIONS"); + success &= truncateTable(conn, stmt, "AUX_TABLE"); + success &= truncateTable(conn, stmt, "WRITE_SET"); + success &= truncateTable(conn, stmt, "REPL_TXN_MAP"); + success &= truncateTable(conn, stmt, "MATERIALIZATION_REBUILD_LOCKS"); + try { + stmt.executeUpdate("INSERT INTO \"NEXT_TXN_ID\" VALUES(1)"); + stmt.executeUpdate("INSERT INTO \"NEXT_LOCK_ID\" VALUES(1)"); + stmt.executeUpdate("INSERT INTO \"NEXT_COMPACTION_QUEUE_ID\" VALUES(1)"); + } catch (SQLException e) { + if (!getTableNotExistsErrorCodes().contains(e.getSQLState())) { + LOG.error("Error initializing NEXT_TXN_ID"); + success = false; + } } + /* + * Don't drop NOTIFICATION_LOG, SEQUENCE_TABLE and NOTIFICATION_SEQUENCE as its used by other + * table which are not txn related to generate primary key. So if these tables are dropped + * and other tables are not dropped, then it will create key duplicate error while inserting + * to other table. + */ + } finally { + closeResources(conn, stmt, null); + } + if(success) { + return; } - /* - * Don't drop NOTIFICATION_LOG, SEQUENCE_TABLE and NOTIFICATION_SEQUENCE as its used by other - * table which are not txn related to generate primary key. So if these tables are dropped - * and other tables are not dropped, then it will create key duplicate error while inserting - * to other table. - */ - } finally { - closeResources(conn, stmt, null); - } - if(success) { - return; } throw new RuntimeException("Failed to clean up txn tables"); } - private static void resetTxnSequence(Connection conn, Statement stmt) throws SQLException, MetaException{ - String dbProduct = conn.getMetaData().getDatabaseProductName(); - DatabaseProduct databaseProduct = determineDatabaseProduct(dbProduct); - switch (databaseProduct) { - - case DERBY: - stmt.execute("ALTER TABLE \"TXNS\" ALTER \"TXN_ID\" RESTART WITH 1"); - stmt.execute("INSERT INTO \"TXNS\" (\"TXN_ID\", \"TXN_STATE\", \"TXN_STARTED\"," - + " \"TXN_LAST_HEARTBEAT\", \"TXN_USER\", \"TXN_HOST\")" - + " VALUES(0, 'c', 0, 0, '', '')"); - break; - case MYSQL: - stmt.execute("ALTER TABLE \"TXNS\" AUTO_INCREMENT=1"); - stmt.execute("SET SQL_MODE='NO_AUTO_VALUE_ON_ZERO,ANSI_QUOTES'"); - stmt.execute("INSERT INTO \"TXNS\" (\"TXN_ID\", \"TXN_STATE\", \"TXN_STARTED\"," - + " \"TXN_LAST_HEARTBEAT\", \"TXN_USER\", \"TXN_HOST\")" - + " VALUES(0, 'c', 0, 0, '', '')"); - break; - case POSTGRES: - stmt.execute("INSERT INTO \"TXNS\" (\"TXN_ID\", \"TXN_STATE\", \"TXN_STARTED\"," - + " \"TXN_LAST_HEARTBEAT\", \"TXN_USER\", \"TXN_HOST\")" - + " VALUES(0, 'c', 0, 0, '', '')"); - stmt.execute("ALTER SEQUENCE \"TXNS_TXN_ID_seq\" RESTART"); - break; - case ORACLE: - stmt.execute("ALTER TABLE \"TXNS\" MODIFY \"TXN_ID\" GENERATED BY DEFAULT AS IDENTITY (START WITH 1)"); - stmt.execute("INSERT INTO \"TXNS\" (\"TXN_ID\", \"TXN_STATE\", \"TXN_STARTED\"," - + " \"TXN_LAST_HEARTBEAT\", \"TXN_USER\", \"TXN_HOST\")" - + " VALUES(0, 'c', 0, 0, '_', '_')"); - break; - case SQLSERVER: - stmt.execute("DBCC CHECKIDENT ('txns', RESEED, 0)"); - stmt.execute("SET IDENTITY_INSERT TXNS ON"); - stmt.execute("INSERT INTO \"TXNS\" (\"TXN_ID\", \"TXN_STATE\", \"TXN_STARTED\"," - + " \"TXN_LAST_HEARTBEAT\", \"TXN_USER\", \"TXN_HOST\")" - + " VALUES(0, 'c', 0, 0, '', '')"); - break; - case OTHER: - default: - break; - } - } - private static boolean truncateTable(Connection conn, Statement stmt, String name) { try { // We can not use actual truncate due to some foreign keys, but we don't expect much data during tests String dbProduct = conn.getMetaData().getDatabaseProductName(); DatabaseProduct databaseProduct = determineDatabaseProduct(dbProduct); - if (databaseProduct == POSTGRES || databaseProduct == MYSQL) { + if (databaseProduct == POSTGRES) { stmt.execute("DELETE FROM \"" + name + "\""); } else { stmt.execute("DELETE FROM " + name); @@ -547,28 +503,4 @@ static void executeQueriesInBatchNoCount(DatabaseProduct dbProduct, Statement st } return affectedRowsByQuery; } - - /** - + * Checks if the dbms supports the getGeneratedKeys for multiline insert statements. - + * @param dbProduct DBMS type - + * @return true if supports - + * @throws MetaException - + */ - public static boolean supportsGetGeneratedKeys(DatabaseProduct dbProduct) throws MetaException { - switch (dbProduct) { - case DERBY: - case SQLSERVER: - // The getGeneratedKeys is not supported for multi line insert - return false; - case ORACLE: - case MYSQL: - case POSTGRES: - return true; - case OTHER: - default: - String msg = "Unknown database product: " + dbProduct.toString(); - LOG.error(msg); - throw new MetaException(msg); - } - } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java index 8fded608d0..fe39b0b36e 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java @@ -55,7 +55,6 @@ import javax.sql.DataSource; -import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.lang3.tuple.Pair; @@ -74,59 +73,7 @@ import org.apache.hadoop.hive.metastore.MetaStoreListenerNotifier; import org.apache.hadoop.hive.metastore.TransactionalMetaStoreEventListener; import org.apache.hadoop.hive.metastore.LockTypeComparator; -import org.apache.hadoop.hive.metastore.api.AbortTxnRequest; -import org.apache.hadoop.hive.metastore.api.AbortTxnsRequest; -import org.apache.hadoop.hive.metastore.api.AddDynamicPartitions; -import org.apache.hadoop.hive.metastore.api.AllocateTableWriteIdsRequest; -import org.apache.hadoop.hive.metastore.api.AllocateTableWriteIdsResponse; -import org.apache.hadoop.hive.metastore.api.CheckLockRequest; -import org.apache.hadoop.hive.metastore.api.CommitTxnRequest; -import org.apache.hadoop.hive.metastore.api.CompactionRequest; -import org.apache.hadoop.hive.metastore.api.CompactionResponse; -import org.apache.hadoop.hive.metastore.api.CompactionType; -import org.apache.hadoop.hive.metastore.api.CreationMetadata; -import org.apache.hadoop.hive.metastore.api.DataOperationType; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse; -import org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse; -import org.apache.hadoop.hive.metastore.api.GetValidWriteIdsRequest; -import org.apache.hadoop.hive.metastore.api.GetValidWriteIdsResponse; -import org.apache.hadoop.hive.metastore.api.HeartbeatRequest; -import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeRequest; -import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse; -import org.apache.hadoop.hive.metastore.api.HiveObjectType; -import org.apache.hadoop.hive.metastore.api.InitializeTableWriteIdsRequest; -import org.apache.hadoop.hive.metastore.api.LockComponent; -import org.apache.hadoop.hive.metastore.api.LockRequest; -import org.apache.hadoop.hive.metastore.api.LockResponse; -import org.apache.hadoop.hive.metastore.api.LockState; -import org.apache.hadoop.hive.metastore.api.LockType; -import org.apache.hadoop.hive.metastore.api.Materialization; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchLockException; -import org.apache.hadoop.hive.metastore.api.NoSuchTxnException; -import org.apache.hadoop.hive.metastore.api.OpenTxnRequest; -import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.ReplLastIdInfo; -import org.apache.hadoop.hive.metastore.api.ReplTblWriteIdStateRequest; -import org.apache.hadoop.hive.metastore.api.ShowCompactRequest; -import org.apache.hadoop.hive.metastore.api.ShowCompactResponse; -import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement; -import org.apache.hadoop.hive.metastore.api.ShowLocksRequest; -import org.apache.hadoop.hive.metastore.api.ShowLocksResponse; -import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.api.TableValidWriteIds; -import org.apache.hadoop.hive.metastore.api.TxnAbortedException; -import org.apache.hadoop.hive.metastore.api.TxnInfo; -import org.apache.hadoop.hive.metastore.api.TxnOpenException; -import org.apache.hadoop.hive.metastore.api.TxnState; -import org.apache.hadoop.hive.metastore.api.TxnToWriteId; -import org.apache.hadoop.hive.metastore.api.TxnType; -import org.apache.hadoop.hive.metastore.api.UnlockRequest; -import org.apache.hadoop.hive.metastore.api.WriteEventInfo; +import org.apache.hadoop.hive.metastore.api.*; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; import org.apache.hadoop.hive.metastore.datasource.DataSourceProvider; @@ -227,12 +174,8 @@ static final protected char MINOR_TYPE = 'i'; // Transaction states - protected static final char TXN_ABORTED = 'a'; - protected static final char TXN_OPEN = 'o'; - protected static final char TXN_COMMITTED = 'c'; - - private static final char TXN_TMP = '_'; - + static final protected char TXN_ABORTED = 'a'; + static final protected char TXN_OPEN = 'o'; //todo: make these like OperationType and remove above char constants enum TxnStatus {OPEN, ABORTED, COMMITTED, UNKNOWN} @@ -246,8 +189,6 @@ private static DataSource connPool; private static DataSource connPoolMutex; - private static final String MANUAL_RETRY = "ManualRetry"; - // Query definitions private static final String HIVE_LOCKS_INSERT_QRY = "INSERT INTO \"HIVE_LOCKS\" ( " + "\"HL_LOCK_EXT_ID\", \"HL_LOCK_INT_ID\", \"HL_TXNID\", \"HL_DB\", \"HL_TABLE\", \"HL_PARTITION\", " + @@ -263,14 +204,11 @@ private static final String COMPL_TXN_COMPONENTS_INSERT_QUERY = "INSERT INTO \"COMPLETED_TXN_COMPONENTS\" " + "(\"CTC_TXNID\"," + " \"CTC_DATABASE\", \"CTC_TABLE\", \"CTC_PARTITION\", \"CTC_WRITEID\", \"CTC_UPDATE_DELETE\")" + " VALUES (%s, ?, ?, ?, ?, %s)"; - private static final String TXNS_INSERT_QRY = "INSERT INTO \"TXNS\" " + - "(\"TXN_STATE\", \"TXN_STARTED\", \"TXN_LAST_HEARTBEAT\", \"TXN_USER\", \"TXN_HOST\", \"TXN_TYPE\") " - + "VALUES(?,%s,%s,?,?,?)"; private static final String SELECT_LOCKS_FOR_LOCK_ID_QUERY = "SELECT \"HL_LOCK_EXT_ID\", \"HL_LOCK_INT_ID\", " + - "\"HL_DB\", \"HL_TABLE\", \"HL_PARTITION\", \"HL_LOCK_STATE\", \"HL_LOCK_TYPE\", \"HL_TXNID\" " + - "FROM \"HIVE_LOCKS\" WHERE \"HL_LOCK_EXT_ID\" = ?"; + "\"HL_DB\", \"HL_TABLE\", \"HL_PARTITION\", \"HL_LOCK_STATE\", \"HL_LOCK_TYPE\", \"HL_TXNID\" " + + "FROM \"HIVE_LOCKS\" WHERE \"HL_LOCK_EXT_ID\" = ?"; private static final String SELECT_TIMED_OUT_LOCKS_QUERY = "SELECT DISTINCT \"HL_LOCK_EXT_ID\" FROM \"HIVE_LOCKS\" " + - "WHERE \"HL_LAST_HEARTBEAT\" < %s - ? AND \"HL_TXNID\" = 0"; + "WHERE \"HL_LAST_HEARTBEAT\" < %s - ? AND \"HL_TXNID\" = 0"; private List transactionalListeners; @@ -335,11 +273,9 @@ char getSqlConst() { protected Configuration conf; private static DatabaseProduct dbProduct; private static SQLGenerator sqlGenerator; - private static long openTxnTimeOutMillis; // (End user) Transaction timeout, in milliseconds. private long timeout; - // Timeout for opening a transaction private int maxBatchSize; private String identifierQuoteString; // quotes to use for quoting tables, where necessary @@ -421,8 +357,6 @@ public void setConf(Configuration conf){ maxOpenTxns = MetastoreConf.getIntVar(conf, ConfVars.MAX_OPEN_TXNS); maxBatchSize = MetastoreConf.getIntVar(conf, ConfVars.JDBC_MAX_BATCH_SIZE); - openTxnTimeOutMillis = MetastoreConf.getTimeVar(conf, ConfVars.TXN_OPENTXN_TIMEOUT, TimeUnit.MILLISECONDS); - try { transactionalListeners = MetaStoreServerUtils.getMetaStoreListeners( TransactionalMetaStoreEventListener.class, @@ -443,74 +377,59 @@ public Configuration getConf() { @RetrySemantics.ReadOnly public GetOpenTxnsInfoResponse getOpenTxnsInfo() throws MetaException { try { - // We need to figure out the HighWaterMark and the list of open transactions. + // We need to figure out the current transaction number and the list of + // open transactions. To avoid needing a transaction on the underlying + // database we'll look at the current transaction number first. If it + // subsequently shows up in the open list that's ok. Connection dbConn = null; Statement stmt = null; ResultSet rs = null; try { - /* - * This method need guarantees from - * {@link #openTxns(OpenTxnRequest)} and {@link #commitTxn(CommitTxnRequest)}. - * It will look at the TXNS table and find each transaction between the max(txn_id) as HighWaterMark - * and the max(txn_id) before the TXN_OPENTXN_TIMEOUT period as LowWaterMark. - * Every transaction that is not found between these will be considered as open, since it may appear later. - * openTxns must ensure, that no new transaction will be opened with txn_id below LWM and - * commitTxn must ensure, that no committed transaction will be removed before the time period expires. + /** + * This method can run at READ_COMMITTED as long as long as + * {@link #openTxns(org.apache.hadoop.hive.metastore.api.OpenTxnRequest)} is atomic. + * More specifically, as long as advancing TransactionID in NEXT_TXN_ID is atomic with + * adding corresponding entries into TXNS. The reason is that any txnid below HWM + * is either in TXNS and thus considered open (Open/Aborted) or it's considered Committed. */ dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED); stmt = dbConn.createStatement(); + String s = "SELECT \"NTXN_NEXT\" - 1 FROM \"NEXT_TXN_ID\""; + LOG.debug("Going to execute query <" + s + ">"); + rs = stmt.executeQuery(s); + if (!rs.next()) { + throw new MetaException("Transaction tables not properly " + + "initialized, no record found in next_txn_id"); + } + long hwm = rs.getLong(1); + if (rs.wasNull()) { + throw new MetaException("Transaction tables not properly " + + "initialized, null record found in next_txn_id"); + } + close(rs); List txnInfos = new ArrayList<>(); - - String s = - "SELECT \"TXN_ID\", \"TXN_STATE\", \"TXN_USER\", \"TXN_HOST\", \"TXN_STARTED\", \"TXN_LAST_HEARTBEAT\", " - + "(" + TxnDbUtil.getEpochFn(dbProduct) + " - \"TXN_STARTED\")" - + "FROM \"TXNS\" ORDER BY \"TXN_ID\""; + //need the WHERE clause below to ensure consistent results with READ_COMMITTED + s = "SELECT \"TXN_ID\", \"TXN_STATE\", \"TXN_USER\", \"TXN_HOST\", \"TXN_STARTED\", \"TXN_LAST_HEARTBEAT\" FROM " + + "\"TXNS\" WHERE \"TXN_ID\" <= " + hwm; LOG.debug("Going to execute query<" + s + ">"); rs = stmt.executeQuery(s); - /* - * We can use the maximum txn_id from the TXNS table as high water mark, since the commitTxn and the Initiator - * guarantees, that the transaction with the highest txn_id will never be removed from the TXNS table. - * If there is a pending openTxns, that is already acquired it's sequenceId but not yet committed the insert - * into the TXNS table, will have either a lower txn_id than HWM and will be listed in the openTxn list, - * or will have a higher txn_id and don't effect this getOpenTxns() call. - */ - long hwm = 0; - long openTxnLowBoundary = 0; - while (rs.next()) { - long txnId = rs.getLong(1); - long age = rs.getLong(7); - hwm = txnId; - if (age < getOpenTxnTimeOutMillis()) { - // We will consider every gap as an open transaction from the previous txnId - openTxnLowBoundary++; - while (txnId > openTxnLowBoundary) { - // Add an empty open transaction for every missing value - txnInfos.add(new TxnInfo(openTxnLowBoundary, TxnState.OPEN, null, null)); - openTxnLowBoundary++; - } - } else { - openTxnLowBoundary = txnId; - } char c = rs.getString(2).charAt(0); TxnState state; switch (c) { - case TXN_COMMITTED: - // This is only here, to avoid adding this txnId as possible gap - continue; + case TXN_ABORTED: + state = TxnState.ABORTED; + break; - case TXN_ABORTED: - state = TxnState.ABORTED; - break; - - case TXN_OPEN: - state = TxnState.OPEN; - break; + case TXN_OPEN: + state = TxnState.OPEN; + break; - default: - throw new MetaException("Unexpected transaction state " + c + " found in txns table"); + default: + throw new MetaException("Unexpected transaction state " + c + + " found in txns table"); } - TxnInfo txnInfo = new TxnInfo(txnId, state, rs.getString(3), rs.getString(4)); + TxnInfo txnInfo = new TxnInfo(rs.getLong(1), state, rs.getString(3), rs.getString(4)); txnInfo.setStartedTime(rs.getLong(5)); txnInfo.setLastHeartbeatTime(rs.getLong(6)); txnInfos.add(txnInfo); @@ -522,8 +441,8 @@ public GetOpenTxnsInfoResponse getOpenTxnsInfo() throws MetaException { LOG.debug("Going to rollback"); rollbackDBConn(dbConn); checkRetryable(dbConn, e, "getOpenTxnsInfo"); - throw new MetaException( - "Unable to select from transaction database: " + getMessage(e) + StringUtils.stringifyException(e)); + throw new MetaException("Unable to select from transaction database: " + getMessage(e) + + StringUtils.stringifyException(e)); } finally { close(rs, stmt, dbConn); } @@ -536,47 +455,42 @@ public GetOpenTxnsInfoResponse getOpenTxnsInfo() throws MetaException { @RetrySemantics.ReadOnly public GetOpenTxnsResponse getOpenTxns() throws MetaException { try { - // We need to figure out the current transaction number and the list of open transactions. + // We need to figure out the current transaction number and the list of + // open transactions. To avoid needing a transaction on the underlying + // database we'll look at the current transaction number first. If it + // subsequently shows up in the open list that's ok. Connection dbConn = null; Statement stmt = null; ResultSet rs = null; try { - /* + /** * This runs at READ_COMMITTED for exactly the same reason as {@link #getOpenTxnsInfo()} */ dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED); stmt = dbConn.createStatement(); - + String s = "SELECT \"NTXN_NEXT\" - 1 FROM \"NEXT_TXN_ID\""; + LOG.debug("Going to execute query <" + s + ">"); + rs = stmt.executeQuery(s); + if (!rs.next()) { + throw new MetaException("Transaction tables not properly " + + "initialized, no record found in next_txn_id"); + } + long hwm = rs.getLong(1); + if (rs.wasNull()) { + throw new MetaException("Transaction tables not properly " + + "initialized, null record found in next_txn_id"); + } + close(rs); List openList = new ArrayList<>(); - String s = "SELECT \"TXN_ID\", \"TXN_STATE\", \"TXN_TYPE\", " - + "(" + TxnDbUtil.getEpochFn(dbProduct) + " - \"TXN_STARTED\")" - + " FROM \"TXNS\" ORDER BY \"TXN_ID\""; + //need the WHERE clause below to ensure consistent results with READ_COMMITTED + s = "SELECT \"TXN_ID\", \"TXN_STATE\", \"TXN_TYPE\" FROM \"TXNS\" WHERE \"TXN_ID\" <= " + hwm + " ORDER BY \"TXN_ID\""; LOG.debug("Going to execute query<" + s + ">"); rs = stmt.executeQuery(s); - long hwm = 0; - long openTxnLowBoundary = 0; long minOpenTxn = Long.MAX_VALUE; BitSet abortedBits = new BitSet(); while (rs.next()) { long txnId = rs.getLong(1); - long age = rs.getLong(4); - hwm = txnId; - if (age < getOpenTxnTimeOutMillis()) { - // We will consider every gap as an open transaction from the previous txnId - openTxnLowBoundary++; - while (txnId > openTxnLowBoundary) { - // Add an empty open transaction for every missing value - openList.add(openTxnLowBoundary); - minOpenTxn = Math.min(minOpenTxn, openTxnLowBoundary); - openTxnLowBoundary++; - } - } else { - openTxnLowBoundary = txnId; - } char txnState = rs.getString(2).charAt(0); - if (txnState == TXN_COMMITTED) { - continue; - } if (txnState == TXN_OPEN) { minOpenTxn = Math.min(minOpenTxn, txnId); } @@ -592,7 +506,7 @@ public GetOpenTxnsResponse getOpenTxns() throws MetaException { dbConn.rollback(); ByteBuffer byteBuffer = ByteBuffer.wrap(abortedBits.toByteArray()); GetOpenTxnsResponse otr = new GetOpenTxnsResponse(hwm, openList, byteBuffer); - if (minOpenTxn < Long.MAX_VALUE) { + if(minOpenTxn < Long.MAX_VALUE) { otr.setMin_open_txn(minOpenTxn); } return otr; @@ -640,20 +554,13 @@ public OpenTxnsResponse openTxns(OpenTxnRequest rqst) throws MetaException { Connection dbConn = null; Statement stmt = null; try { - /* + lockInternal(); + /** * To make {@link #getOpenTxns()}/{@link #getOpenTxnsInfo()} work correctly, this operation must ensure - * that looking at the TXNS table every open transaction could be identified below a given High Water Mark. - * One way to do it, would be to serialize the openTxns call with a S4U lock, but that would cause - * performance degradation with high transaction load. - * To enable parallel openTxn calls, we define a time period (TXN_OPENTXN_TIMEOUT) and consider every - * transaction missing from the TXNS table in that period open, and prevent opening transaction outside - * the period. - * Example: At t[0] there is one open transaction in the TXNS table, T[1]. - * T[2] acquires the next sequence at t[1] but only commits into the TXNS table at t[10]. - * T[3] acquires its sequence at t[2], and commits into the TXNS table at t[3]. - * Then T[3] calculates it’s snapshot at t[4] and puts T[1] and also T[2] in the snapshot’s - * open transaction list. T[1] because it is presented as open in TXNS, - * T[2] because it is a missing sequence. + * that advancing the counter in NEXT_TXN_ID and adding appropriate entries to TXNS is atomic. + * Also, advancing the counter must work when multiple metastores are running. + * SELECT ... FOR UPDATE is used to prevent + * concurrent DB transactions being rolled back due to Write-Write conflict on NEXT_TXN_ID. * * In the current design, there can be several metastore instances running in a given Warehouse. * This makes ideas like reserving a range of IDs to save trips to DB impossible. For example, @@ -666,67 +573,35 @@ public OpenTxnsResponse openTxns(OpenTxnRequest rqst) throws MetaException { * set could support a write-through cache for added performance. */ dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED); - stmt = dbConn.createStatement(); - /* - * The openTxn and commitTxn must be mutexed, when committing a not read only transaction. - * This is achieved by requesting a shared table lock here, and an exclusive one at commit. - * Since table locks are working in Derby, we don't need the lockInternal call here. - * Example: Suppose we have two transactions with update like x = x+1. - * We have T[3,3] that was using a value from a snapshot with T[2,2]. If we allow committing T[3,3] - * and opening T[4] parallel it is possible, that T[4] will be using the value from a snapshot with T[2,2], - * and we will have a lost update problem - */ - acquireTxnLock(stmt, true); - // Measure the time from acquiring the sequence value, till committing in the TXNS table - StopWatch generateTransactionWatch = new StopWatch(); - generateTransactionWatch.start(); + // Make sure the user has not requested an insane amount of txns. + int maxTxns = MetastoreConf.getIntVar(conf, ConfVars.TXN_MAX_OPEN_BATCH); + if (numTxns > maxTxns) numTxns = maxTxns; - List txnIds = openTxns(dbConn, rqst); + stmt = dbConn.createStatement(); + List txnIds = openTxns(dbConn, stmt, rqst); LOG.debug("Going to commit"); dbConn.commit(); - generateTransactionWatch.stop(); - long elapsedMillis = generateTransactionWatch.getTime(TimeUnit.MILLISECONDS); - TxnType txnType = rqst.isSetTxn_type() ? rqst.getTxn_type() : TxnType.DEFAULT; - if (txnType != TxnType.READ_ONLY && elapsedMillis >= openTxnTimeOutMillis) { - /* - * The commit was too slow, we can not allow this to continue (except if it is read only, - * since that can not cause dirty reads). - * When calculating the snapshot for a given transaction, we look back for possible open transactions - * (that are not yet committed in the TXNS table), for TXN_OPENTXN_TIMEOUT period. - * We can not allow a write transaction, that was slower than TXN_OPENTXN_TIMEOUT to continue, - * because there can be other transactions running, that didn't considered this transactionId open, - * this could cause dirty reads. - */ - LOG.error("OpenTxnTimeOut exceeded commit duration {}, deleting transactionIds: {}", elapsedMillis, txnIds); - deleteInvalidOpenTransactions(dbConn, txnIds); - /* - * We do not throw RetryException directly, to not circumvent the max retry limit - */ - throw new SQLException("OpenTxnTimeOut exceeded", MANUAL_RETRY); - } return new OpenTxnsResponse(txnIds); } catch (SQLException e) { LOG.debug("Going to rollback"); rollbackDBConn(dbConn); checkRetryable(dbConn, e, "openTxns(" + rqst + ")"); - throw new MetaException("Unable to select from transaction database " + StringUtils.stringifyException(e)); + throw new MetaException("Unable to select from transaction database " + + StringUtils.stringifyException(e)); } finally { close(null, stmt, dbConn); + unlockInternal(); } } catch (RetryException e) { return openTxns(rqst); } } - private List openTxns(Connection dbConn, OpenTxnRequest rqst) + private List openTxns(Connection dbConn, Statement stmt, OpenTxnRequest rqst) throws SQLException, MetaException { int numTxns = rqst.getNum_txns(); - // Make sure the user has not requested an insane amount of txns. - int maxTxns = MetastoreConf.getIntVar(conf, ConfVars.TXN_MAX_OPEN_BATCH); - if (numTxns > maxTxns) { - numTxns = maxTxns; - } + ResultSet rs = null; List insertPreparedStmts = null; TxnType txnType = rqst.isSetTxn_type() ? rqst.getTxn_type() : TxnType.DEFAULT; try { @@ -745,54 +620,51 @@ public OpenTxnsResponse openTxns(OpenTxnRequest rqst) throws MetaException { txnType = TxnType.REPL_CREATED; } - List txnIds = new ArrayList<>(numTxns); - /* - * The getGeneratedKeys are not supported in every dbms, after executing a multi line insert. - * But it is supported in every used dbms for single line insert, even if the metadata says otherwise. - * If the getGeneratedKeys are not supported first we insert a random batchId in the TXN_META_INFO field, - * then the keys are selected beck with that batchid. - */ - boolean genKeySupport = TxnDbUtil.supportsGetGeneratedKeys(dbProduct); - genKeySupport = genKeySupport || (numTxns == 1); - - String insertQuery = String.format(TXNS_INSERT_QRY, TxnDbUtil.getEpochFn(dbProduct), - TxnDbUtil.getEpochFn(dbProduct)); - LOG.debug("Going to execute insert <" + insertQuery + ">"); - try (PreparedStatement ps = dbConn.prepareStatement(insertQuery, new String[] {"TXN_ID"})) { - String state = genKeySupport ? Character.toString(TXN_OPEN) : Character.toString(TXN_TMP); - if (numTxns == 1) { - ps.setString(1, state); - ps.setString(2, rqst.getUser()); - ps.setString(3, rqst.getHostname()); - ps.setInt(4, txnType.getValue()); - txnIds.addAll(executeTxnInsertBatchAndExtractGeneratedKeys(dbConn, genKeySupport, ps, false)); - } else { - for (int i = 0; i < numTxns; ++i) { - ps.setString(1, state); - ps.setString(2, rqst.getUser()); - ps.setString(3, rqst.getHostname()); - ps.setInt(4, txnType.getValue()); - ps.addBatch(); - - if ((i + 1) % maxBatchSize == 0) { - txnIds.addAll(executeTxnInsertBatchAndExtractGeneratedKeys(dbConn, genKeySupport, ps, true)); - } - } - if (numTxns % maxBatchSize != 0) { - txnIds.addAll(executeTxnInsertBatchAndExtractGeneratedKeys(dbConn, genKeySupport, ps, true)); - } - } + String s = sqlGenerator.addForUpdateClause("SELECT \"NTXN_NEXT\" FROM \"NEXT_TXN_ID\""); + LOG.debug("Going to execute query <" + s + ">"); + rs = stmt.executeQuery(s); + if (!rs.next()) { + throw new MetaException("Transaction database not properly " + + "configured, can't find next transaction id."); } + long first = rs.getLong(1); + s = "UPDATE \"NEXT_TXN_ID\" SET \"NTXN_NEXT\" = " + (first + numTxns); + LOG.debug("Going to execute update <" + s + ">"); + stmt.executeUpdate(s); - assert txnIds.size() == numTxns; + List txnIds = new ArrayList<>(numTxns); + + List rows = new ArrayList<>(); + List params = new ArrayList<>(); + params.add(rqst.getUser()); + params.add(rqst.getHostname()); + List> paramsList = new ArrayList<>(numTxns); + + for (long i = first; i < first + numTxns; i++) { + txnIds.add(i); + rows.add(i + "," + quoteChar(TXN_OPEN) + "," + TxnDbUtil.getEpochFn(dbProduct) + "," + + TxnDbUtil.getEpochFn(dbProduct) + ",?,?," + txnType.getValue()); + paramsList.add(params); + } + insertPreparedStmts = sqlGenerator.createInsertValuesPreparedStmt(dbConn, + "\"TXNS\" (\"TXN_ID\", \"TXN_STATE\", \"TXN_STARTED\", \"TXN_LAST_HEARTBEAT\", " + + "\"TXN_USER\", \"TXN_HOST\", \"TXN_TYPE\")", + rows, paramsList); + for (PreparedStatement pst : insertPreparedStmts) { + pst.execute(); + } if (rqst.isSetReplPolicy()) { - List rowsRepl = new ArrayList<>(numTxns); - List params = new ArrayList<>(1); - List> paramsList = new ArrayList<>(numTxns); + List rowsRepl = new ArrayList<>(); + for (PreparedStatement pst : insertPreparedStmts) { + pst.close(); + } + insertPreparedStmts.clear(); + params.clear(); + paramsList.clear(); params.add(rqst.getReplPolicy()); for (int i = 0; i < numTxns; i++) { - rowsRepl.add("?," + rqst.getReplSrcTxnIds().get(i) + "," + txnIds.get(i)); + rowsRepl.add( "?," + rqst.getReplSrcTxnIds().get(i) + "," + txnIds.get(i)); paramsList.add(params); } @@ -815,125 +687,10 @@ public OpenTxnsResponse openTxns(OpenTxnRequest rqst) throws MetaException { pst.close(); } } + close(rs); } } - private List executeTxnInsertBatchAndExtractGeneratedKeys(Connection dbConn, boolean genKeySupport, - PreparedStatement ps, boolean batch) throws SQLException { - List txnIds = new ArrayList<>(); - if (batch) { - ps.executeBatch(); - } else { - // For slight performance advantage we do not use the executeBatch, when we only have one row - ps.execute(); - } - if (genKeySupport) { - try (ResultSet generatedKeys = ps.getGeneratedKeys()) { - while (generatedKeys.next()) { - txnIds.add(generatedKeys.getLong(1)); - } - } - } else { - try (PreparedStatement pstmt = - dbConn.prepareStatement("SELECT \"TXN_ID\" FROM \"TXNS\" WHERE \"TXN_STATE\" = ?")) { - pstmt.setString(1, Character.toString(TXN_TMP)); - try (ResultSet rs = pstmt.executeQuery()) { - while (rs.next()) { - txnIds.add(rs.getLong(1)); - } - } - } - try (PreparedStatement pstmt = dbConn - .prepareStatement("UPDATE \"TXNS\" SET \"TXN_STATE\" = ? WHERE \"TXN_STATE\" = ?")) { - pstmt.setString(1, Character.toString(TXN_OPEN)); - pstmt.setString(2, Character.toString(TXN_TMP)); - pstmt.executeUpdate(); - } - } - return txnIds; - } - - private void deleteInvalidOpenTransactions(Connection dbConn, List txnIds) throws MetaException { - if (txnIds.size() == 0) { - return; - } - try { - Statement stmt = null; - try { - stmt = dbConn.createStatement(); - - List queries = new ArrayList<>(); - StringBuilder prefix = new StringBuilder(); - StringBuilder suffix = new StringBuilder(); - prefix.append("DELETE FROM \"TXNS\" WHERE "); - TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix, txnIds, "\"TXN_ID\"", false, false); - for (String s : queries) { - LOG.debug("Going to execute update <" + s + ">"); - stmt.executeUpdate(s); - } - } catch (SQLException e) { - LOG.debug("Going to rollback"); - rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "deleteInvalidOpenTransactions(" + txnIds + ")"); - throw new MetaException("Unable to select from transaction database " + StringUtils.stringifyException(e)); - } finally { - closeStmt(stmt); - } - } catch (RetryException ex) { - deleteInvalidOpenTransactions(dbConn, txnIds); - } - } - - @Override - public long getOpenTxnTimeOutMillis() { - return openTxnTimeOutMillis; - } - - @Override - public void setOpenTxnTimeOutMillis(long openTxnTimeOutMillis) { - this.openTxnTimeOutMillis = openTxnTimeOutMillis; - } - - protected long getOpenTxnTimeoutLowBoundaryTxnId(Connection dbConn) throws MetaException, SQLException { - long maxTxnId; - String s = - "SELECT MAX(\"TXN_ID\") FROM \"TXNS\" WHERE \"TXN_STARTED\" < (" + TxnDbUtil.getEpochFn(dbProduct) + " - " - + openTxnTimeOutMillis + ")"; - try (Statement stmt = dbConn.createStatement()) { - LOG.debug("Going to execute query <" + s + ">"); - try (ResultSet maxTxnIdRs = stmt.executeQuery(s)) { - maxTxnIdRs.next(); - maxTxnId = maxTxnIdRs.getLong(1); - if (maxTxnIdRs.wasNull()) { - /* - * TXNS always contains at least one transaction, - * the row where txnid = (select max(txnid) where txn_started < epoch - TXN_OPENTXN_TIMEOUT) is never deleted - */ - throw new MetaException("Transaction tables not properly " + "initialized, null record found in MAX(TXN_ID)"); - } - } - } - return maxTxnId; - } - - private long getHighWaterMark(Statement stmt) throws SQLException, MetaException { - String s = "SELECT MAX(\"TXN_ID\") FROM \"TXNS\""; - LOG.debug("Going to execute query <" + s + ">"); - long maxOpenTxnId; - try (ResultSet maxOpenTxnIdRs = stmt.executeQuery(s)) { - maxOpenTxnIdRs.next(); - maxOpenTxnId = maxOpenTxnIdRs.getLong(1); - if (maxOpenTxnIdRs.wasNull()) { - /* - * TXNS always contains at least one transaction, - * the row where txnid = (select max(txnid) where txn_started < epoch - TXN_OPENTXN_TIMEOUT) is never deleted - */ - throw new MetaException("Transaction tables not properly " + "initialized, null record found in MAX(TXN_ID)"); - } - } - return maxOpenTxnId; - } - private List getTargetTxnIdList(String replPolicy, List sourceTxnIdList, Connection dbConn) throws SQLException { PreparedStatement pst = null; @@ -959,7 +716,7 @@ private long getHighWaterMark(Statement stmt) throws SQLException, MetaException } LOG.debug("targetTxnid for srcTxnId " + sourceTxnIdList.toString() + " is " + targetTxnIdList.toString()); return targetTxnIdList; - } catch (SQLException e) { + } catch (SQLException e) { LOG.warn("failed to get target txn ids " + e.getMessage()); throw e; } finally { @@ -1402,7 +1159,7 @@ public void commitTxn(CommitTxnRequest rqst) * even if it includes all of its columns * * First insert into write_set using a temporary commitID, which will be updated in a separate call, - * see: {@link #updateWSCommitIdAndCleanUpMetadata(Statement, long, TxnType, Long, long)}}. + * see: {@link #updateCommitIdAndCleanUpMetadata(Statement, long, TxnType, Long, long)}}. * This should decrease the scope of the S4U lock on the next_txn_id table. */ Savepoint undoWriteSetForCurrentTxn = dbConn.setSavepoint(); @@ -1416,8 +1173,13 @@ public void commitTxn(CommitTxnRequest rqst) * at the same time and no new txns start until all 3 commit. * We could've incremented the sequence for commitId as well but it doesn't add anything functionally. */ - acquireTxnLock(stmt, false); - commitId = getHighWaterMark(stmt); + try (ResultSet commitIdRs = stmt.executeQuery(sqlGenerator.addForUpdateClause("SELECT \"NTXN_NEXT\" - 1 FROM \"NEXT_TXN_ID\""))) { + if (!commitIdRs.next()) { + throw new IllegalStateException("No rows found in NEXT_TXN_ID"); + } + commitId = commitIdRs.getLong(1); + } + /** * see if there are any overlapping txns that wrote the same element, i.e. have a conflict * Since entire commit operation is mutexed wrt other start/commit ops, @@ -1449,8 +1211,9 @@ public void commitTxn(CommitTxnRequest rqst) throw new TxnAbortedException(msg); } } - } else { - /* + } + else { + /** * current txn didn't update/delete anything (may have inserted), so just proceed with commit * * We only care about commit id for write txns, so for RO (when supported) txns we don't @@ -1460,7 +1223,6 @@ public void commitTxn(CommitTxnRequest rqst) * If RO < W, then there is no reads-from relationship. * In replication flow we don't expect any write write conflict as it should have been handled at source. */ - assert true; } if (txnRecord.type != TxnType.READ_ONLY && !rqst.isSetReplPolicy()) { @@ -1491,7 +1253,7 @@ public void commitTxn(CommitTxnRequest rqst) } deleteReplTxnMapEntry(dbConn, sourceTxnId, rqst.getReplPolicy()); } - updateWSCommitIdAndCleanUpMetadata(stmt, txnid, txnRecord.type, commitId, tempCommitId); + updateCommitIdAndCleanUpMetadata(stmt, txnid, txnRecord.type, commitId, tempCommitId); if (rqst.isSetKeyValue()) { updateKeyValueAssociatedWithTxn(rqst, stmt); } @@ -1510,7 +1272,8 @@ public void commitTxn(CommitTxnRequest rqst) throw new MetaException("Unable to update transaction database " + StringUtils.stringifyException(e)); } finally { - close(null, stmt, dbConn); + closeStmt(stmt); + closeDbConn(dbConn); unlockInternal(); } } catch (RetryException e) { @@ -1575,8 +1338,7 @@ private void moveTxnComponentsToCompleted(Statement stmt, long txnid, char isUpd } } - private void updateWSCommitIdAndCleanUpMetadata(Statement stmt, long txnid, TxnType txnType, - Long commitId, long tempId) throws SQLException { + private void updateCommitIdAndCleanUpMetadata(Statement stmt, long txnid, TxnType txnType, Long commitId, long tempId) throws SQLException { List queryBatch = new ArrayList<>(5); // update write_set with real commitId if (commitId != null) { @@ -1588,8 +1350,7 @@ private void updateWSCommitIdAndCleanUpMetadata(Statement stmt, long txnid, TxnT queryBatch.add("DELETE FROM \"TXN_COMPONENTS\" WHERE \"TC_TXNID\" = " + txnid); } queryBatch.add("DELETE FROM \"HIVE_LOCKS\" WHERE \"HL_TXNID\" = " + txnid); - // DO NOT remove the transaction from the TXN table, the cleaner will remove it when appropriate - queryBatch.add("UPDATE \"TXNS\" SET \"TXN_STATE\" = " + quoteChar(TXN_COMMITTED) + " WHERE \"TXN_ID\" = " + txnid); + queryBatch.add("DELETE FROM \"TXNS\" WHERE \"TXN_ID\" = " + txnid); queryBatch.add("DELETE FROM \"MATERIALIZATION_REBUILD_LOCKS\" WHERE \"MRL_TXN_ID\" = " + txnid); // execute all in one batch @@ -1670,9 +1431,7 @@ public void replTableWriteIdState(ReplTblWriteIdStateRequest rqst) throws MetaEx if (numAbortedWrites > 0) { // Allocate/Map one txn per aborted writeId and abort the txn to mark writeid as aborted. - // We don't use the txnLock, all of these transactions will be aborted in this one rdbm transaction - // So they will not effect the commitTxn in any way - List txnIds = openTxns(dbConn, + List txnIds = openTxns(dbConn, stmt, new OpenTxnRequest(numAbortedWrites, rqst.getUser(), rqst.getHostName())); assert(numAbortedWrites == txnIds.size()); @@ -1732,7 +1491,7 @@ public void replTableWriteIdState(ReplTblWriteIdStateRequest rqst) throws MetaEx } closeStmt(pStmt); close(rs, stmt, dbConn); - if (handle != null) { + if(handle != null) { handle.releaseLocks(); } unlockInternal(); @@ -1774,9 +1533,7 @@ private ValidTxnList getValidTxnList(Connection dbConn, String fullTableName, Lo String[] names = TxnUtils.getDbTableName(fullTableName); assert names.length == 2; List params = Arrays.asList(names[0], names[1]); - String s = - "SELECT \"T2W_TXNID\" FROM \"TXN_TO_WRITE_ID\" WHERE \"T2W_DATABASE\" = ? AND " - + "\"T2W_TABLE\" = ? AND \"T2W_WRITEID\" = "+ writeId; + String s = "SELECT \"T2W_TXNID\" FROM \"TXN_TO_WRITE_ID\" WHERE \"T2W_DATABASE\" = ? AND \"T2W_TABLE\" = ? AND \"T2W_WRITEID\" = " + writeId; pst = sqlGenerator.prepareStmtWithParameters(dbConn, s, params); LOG.debug("Going to execute query <" + s.replaceAll("\\?", "{}") + ">", quoteString(names[0]), quoteString(names[1])); @@ -2244,37 +2001,46 @@ public void addWriteNotificationLog(AcidWriteEvent acidWriteEvent) @Override @RetrySemantics.SafeToRetry - public void performWriteSetGC() throws MetaException { + public void performWriteSetGC() { Connection dbConn = null; Statement stmt = null; ResultSet rs = null; try { dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED); stmt = dbConn.createStatement(); - - long minOpenTxn; + rs = stmt.executeQuery("SELECT \"NTXN_NEXT\" - 1 FROM \"NEXT_TXN_ID\""); + if(!rs.next()) { + throw new IllegalStateException("NEXT_TXN_ID is empty: DB is corrupted"); + } + long highestAllocatedTxnId = rs.getLong(1); + close(rs); rs = stmt.executeQuery("SELECT MIN(\"TXN_ID\") FROM \"TXNS\" WHERE \"TXN_STATE\"=" + quoteChar(TXN_OPEN)); - if (!rs.next()) { + if(!rs.next()) { throw new IllegalStateException("Scalar query returned no rows?!?!!"); } - minOpenTxn = rs.getLong(1); - if (rs.wasNull()) { - minOpenTxn = Long.MAX_VALUE; + long commitHighWaterMark;//all currently open txns (if any) have txnid >= than commitHighWaterMark + long lowestOpenTxnId = rs.getLong(1); + if(rs.wasNull()) { + //if here then there are no Open txns and highestAllocatedTxnId must be + //resolved (i.e. committed or aborted), either way + //there are no open txns with id <= highestAllocatedTxnId + //the +1 is there because "delete ..." below has < (which is correct for the case when + //there is an open txn + //Concurrency: even if new txn starts (or starts + commits) it is still true that + //there are no currently open txns that overlap with any committed txn with + //commitId <= commitHighWaterMark (as set on next line). So plain READ_COMMITTED is enough. + commitHighWaterMark = highestAllocatedTxnId + 1; + } + else { + commitHighWaterMark = lowestOpenTxnId; } - long lowWaterMark = getOpenTxnTimeoutLowBoundaryTxnId(dbConn); - /** - * We try to find the highest transactionId below everything was committed or aborted. - * For that we look for the lowest open transaction in the TXNS and the TxnMinTimeout boundary, - * because it is guaranteed there won't be open transactions below that. - */ - long commitHighWaterMark = Long.min(minOpenTxn, lowWaterMark + 1); - LOG.debug("Perform WriteSet GC with minOpenTxn {}, lowWaterMark {}", minOpenTxn, lowWaterMark); int delCnt = stmt.executeUpdate("DELETE FROM \"WRITE_SET\" WHERE \"WS_COMMIT_ID\" < " + commitHighWaterMark); LOG.info("Deleted {} obsolete rows from WRITE_SET", delCnt); dbConn.commit(); } catch (SQLException ex) { LOG.warn("WriteSet GC failed due to " + getMessage(ex), ex); - } finally { + } + finally { close(rs, stmt, dbConn); } } @@ -2562,8 +2328,7 @@ public long cleanupMaterializationRebuildLocks(ValidTxnList validTxnList, long t public LockResponse lock(LockRequest rqst) throws NoSuchTxnException, TxnAbortedException, MetaException { ConnectionLockIdPair connAndLockId = enqueueLockWithRetry(rqst); try { - return checkLockWithRetry(connAndLockId.dbConn, connAndLockId.extLockId, rqst.getTxnid(), - rqst.isZeroWaitReadEnabled()); + return checkLockWithRetry(connAndLockId.dbConn, connAndLockId.extLockId, rqst.getTxnid()); } catch(NoSuchLockException e) { // This should never happen, as we just added the lock id @@ -2869,7 +2634,7 @@ private static String normalizeCase(String s) { return s == null ? null : s.toLowerCase(); } - private LockResponse checkLockWithRetry(Connection dbConn, long extLockId, long txnId, boolean zeroWaitReadEnabled) + private LockResponse checkLockWithRetry(Connection dbConn, long extLockId, long txnId) throws NoSuchLockException, TxnAbortedException, MetaException { try { try { @@ -2878,7 +2643,7 @@ private LockResponse checkLockWithRetry(Connection dbConn, long extLockId, long //should only get here if retrying this op dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED); } - return checkLock(dbConn, extLockId, txnId, zeroWaitReadEnabled); + return checkLock(dbConn, extLockId, txnId); } catch (SQLException e) { LOG.error("checkLock failed for extLockId={}/txnId={}. Exception msg: {}", extLockId, txnId, getMessage(e)); rollbackDBConn(dbConn); @@ -2893,7 +2658,7 @@ private LockResponse checkLockWithRetry(Connection dbConn, long extLockId, long catch(RetryException e) { LOG.debug("Going to retry checkLock for extLockId={}/txnId={} after catching RetryException with message: {}", extLockId, txnId, e.getMessage()); - return checkLockWithRetry(dbConn, extLockId, txnId, zeroWaitReadEnabled); + return checkLockWithRetry(dbConn, extLockId, txnId); } } /** @@ -2938,7 +2703,7 @@ public LockResponse checkLock(CheckLockRequest rqst) //todo: strictly speaking there is a bug here. heartbeat*() commits but both heartbeat and //checkLock() are in the same retry block, so if checkLock() throws, heartbeat is also retired //extra heartbeat is logically harmless, but ... - return checkLock(dbConn, extLockId, lockInfo.txnId, false); + return checkLock(dbConn, extLockId, lockInfo.txnId); } catch (SQLException e) { LOG.error("checkLock failed for request={}. Exception msg: {}", rqst, getMessage(e)); rollbackDBConn(dbConn); @@ -4388,7 +4153,7 @@ public int compare(LockInfo info1, LockInfo info2) { private void checkQFileTestHack(){ boolean hackOn = MetastoreConf.getBoolVar(conf, ConfVars.HIVE_IN_TEST) || - MetastoreConf.getBoolVar(conf, ConfVars.HIVE_IN_TEZ_TEST); + MetastoreConf.getBoolVar(conf, ConfVars.HIVE_IN_TEZ_TEST); if (hackOn) { LOG.info("Hacking in canned values for transaction manager"); // Set up the transaction/locking db in the derby metastore @@ -4485,7 +4250,7 @@ private static boolean isValidTxn(long txnId) { * checkLock() will in the worst case keep locks in Waiting state a little longer. */ @RetrySemantics.SafeToRetry("See @SafeToRetry") - private LockResponse checkLock(Connection dbConn, long extLockId, long txnId, boolean zeroWaitReadEnabled) + private LockResponse checkLock(Connection dbConn, long extLockId, long txnId) throws NoSuchLockException, TxnAbortedException, MetaException, SQLException { Statement stmt = null; ResultSet rs = null; @@ -4567,7 +4332,7 @@ private LockResponse checkLock(Connection dbConn, long extLockId, long txnId, bo } String queryStr = - " \"EX\".*, \"REQ\".\"HL_LOCK_INT_ID\" \"LOCK_INT_ID\", \"REQ\".\"HL_LOCK_TYPE\" \"LOCK_TYPE\" FROM (" + + " \"EX\".*, \"REQ\".\"HL_LOCK_INT_ID\" AS \"REQ_LOCK_INT_ID\" FROM (" + " SELECT \"HL_LOCK_EXT_ID\", \"HL_LOCK_INT_ID\", \"HL_TXNID\", \"HL_DB\", \"HL_TABLE\", \"HL_PARTITION\"," + " \"HL_LOCK_STATE\", \"HL_LOCK_TYPE\" FROM \"HIVE_LOCKS\"" + " WHERE \"HL_LOCK_EXT_ID\" < " + extLockId + ") \"EX\"" + @@ -4592,9 +4357,6 @@ private LockResponse checkLock(Connection dbConn, long extLockId, long txnId, bo is performed on that db (e.g. show tables, created table, etc). EXCLUSIVE on an object may mean it's being dropped or overwritten.*/ String[] whereStr = { - // shared-read - " \"REQ\".\"HL_LOCK_TYPE\"=" + LockTypeUtil.sharedRead() + " AND \"EX\".\"HL_LOCK_TYPE\"=" + - LockTypeUtil.exclusive() + " AND NOT (\"EX\".\"HL_TABLE\" IS NOT NULL AND \"REQ\".\"HL_TABLE\" IS NULL)", // exclusive " \"REQ\".\"HL_LOCK_TYPE\"=" + LockTypeUtil.exclusive() + " AND NOT (\"EX\".\"HL_TABLE\" IS NULL AND \"EX\".\"HL_LOCK_TYPE\"=" + @@ -4604,7 +4366,10 @@ is performed on that db (e.g. show tables, created table, etc). LockTypeUtil.exclWrite() + "," + LockTypeUtil.exclusive() + ")", // excl-write " \"REQ\".\"HL_LOCK_TYPE\"=" + LockTypeUtil.exclWrite() + " AND \"EX\".\"HL_LOCK_TYPE\"!=" + - LockTypeUtil.sharedRead() + LockTypeUtil.sharedRead(), + // shared-read + " \"REQ\".\"HL_LOCK_TYPE\"=" + LockTypeUtil.sharedRead() + " AND \"EX\".\"HL_LOCK_TYPE\"=" + + LockTypeUtil.exclusive() + " AND NOT (\"EX\".\"HL_TABLE\" IS NOT NULL AND \"REQ\".\"HL_TABLE\" IS NULL)" }; List subQuery = new ArrayList<>(); @@ -4620,40 +4385,21 @@ is performed on that db (e.g. show tables, created table, etc). if (rs.next()) { // We acquire all locks for a given query atomically; if 1 blocks, all remain in Waiting state. LockInfo blockedBy = new LockInfo(rs); - long intLockId = rs.getLong("LOCK_INT_ID"); - char lockChar = rs.getString("LOCK_TYPE").charAt(0); + long intLockId = rs.getLong("REQ_LOCK_INT_ID"); + LOG.debug("Failure to acquire lock({} intLockId:{} {}), blocked by ({}})", JavaUtils.lockIdToString(extLockId), + intLockId, JavaUtils.txnIdToString(txnId), blockedBy); - LOG.debug("Failure to acquire lock({} intLockId:{} {}), blocked by ({})", JavaUtils.lockIdToString(extLockId), - intLockId, JavaUtils.txnIdToString(txnId), blockedBy); - - if (zeroWaitReadEnabled && isValidTxn(txnId)) { - LockType lockType = LockTypeUtil.getLockTypeFromEncoding(lockChar) - .orElseThrow(() -> new MetaException("Unknown lock type: " + lockChar)); - - if (lockType == LockType.SHARED_READ) { - String cleanupQuery = "DELETE FROM \"HIVE_LOCKS\" WHERE \"HL_LOCK_EXT_ID\" = " + extLockId; - - LOG.debug("Going to execute query: <" + cleanupQuery + ">"); - stmt.executeUpdate(cleanupQuery); - dbConn.commit(); - - response.setErrorMessage(String.format( - "Unable to acquire read lock due to an exclusive lock {%s}", blockedBy)); - response.setState(LockState.NOT_ACQUIRED); - return response; - } - } String updateBlockedByQuery = "UPDATE \"HIVE_LOCKS\"" + - " SET \"HL_BLOCKEDBY_EXT_ID\" = " + blockedBy.extLockId + - ", \"HL_BLOCKEDBY_INT_ID\" = " + blockedBy.intLockId + - " WHERE \"HL_LOCK_EXT_ID\" = " + extLockId + " AND \"HL_LOCK_INT_ID\" = " + intLockId; + " SET \"HL_BLOCKEDBY_EXT_ID\" = " + blockedBy.extLockId + + ", \"HL_BLOCKEDBY_INT_ID\" = " + blockedBy.intLockId + + " WHERE \"HL_LOCK_EXT_ID\" = " + extLockId + " AND \"HL_LOCK_INT_ID\" = " + intLockId; LOG.debug("Going to execute query: <" + updateBlockedByQuery + ">"); int updCnt = stmt.executeUpdate(updateBlockedByQuery); if (updCnt != 1) { - LOG.error("Failure to update lock (extLockId={}, intLockId={}) with the blocking lock's IDs " + - "(extLockId={}, intLockId={})", extLockId, intLockId, blockedBy.extLockId, blockedBy.intLockId); + LOG.error("Failure to update blocked lock (extLockId={}, intLockId={}) with the blocking lock's IDs (extLockId={}, intLockId={})", + extLockId, intLockId, blockedBy.extLockId, blockedBy.intLockId); shouldNeverHappen(txnId, extLockId, intLockId); } dbConn.commit(); @@ -4759,7 +4505,7 @@ private void heartbeatTxn(Connection dbConn, long txnid) } /** - * Returns the state of the transaction if it's able to determine it. Some cases where it cannot: + * Returns the state of the transaction iff it's able to determine it. Some cases where it cannot: * 1. txnid was Aborted/Committed and then GC'd (compacted) * 2. txnid was committed but it didn't modify anything (nothing in COMPLETED_TXN_COMPONENTS) */ @@ -4784,9 +4530,6 @@ private TxnStatus findTxnState(long txnid, Statement stmt) throws SQLException, if (txnState == TXN_ABORTED) { return TxnStatus.ABORTED; } - if (txnState == TXN_COMMITTED) { - return TxnStatus.COMMITTED; - } assert txnState == TXN_OPEN : "we found it in TXNS but it's not ABORTED, so must be OPEN"; } return TxnStatus.OPEN; @@ -5166,15 +4909,11 @@ private static synchronized DataSource setupJdbcConnectionPool(Configuration con static boolean isRetryable(Configuration conf, Exception ex) { if(ex instanceof SQLException) { SQLException sqlException = (SQLException)ex; - if (MANUAL_RETRY.equalsIgnoreCase(sqlException.getSQLState())) { - // Manual retry exception was thrown - return true; - } - if ("08S01".equalsIgnoreCase(sqlException.getSQLState())) { + if("08S01".equalsIgnoreCase(sqlException.getSQLState())) { //in MSSQL this means Communication Link Failure return true; } - if ("ORA-08176".equalsIgnoreCase(sqlException.getSQLState()) || + if("ORA-08176".equalsIgnoreCase(sqlException.getSQLState()) || sqlException.getMessage().contains("consistent read failure; rollback data not available")) { return true; } @@ -5353,25 +5092,10 @@ public LockHandle acquireLock(String key) throws MetaException { return acquireLock(key); } } - - @Override public void acquireLock(String key, LockHandle handle) { //the idea is that this will use LockHandle.dbConn throw new NotImplementedException("acquireLock(String, LockHandle) is not implemented"); } - - /** - * Acquire the global txn lock, used to mutex the openTxn and commitTxn. - * @param stmt Statement to execute the lock on - * @param shared either SHARED_READ or EXCLUSIVE - * @throws SQLException - */ - private void acquireTxnLock(Statement stmt, boolean shared) throws SQLException, MetaException { - String sqlStmt = sqlGenerator.createTxnLockStatement(shared); - stmt.execute(sqlStmt); - LOG.debug("TXN lock locked by {} in mode {}", quoteString(TxnHandler.hostname), shared); - } - private static final class LockHandleImpl implements LockHandle { private final Connection dbConn; private final Statement stmt; @@ -5408,7 +5132,6 @@ public void releaseLocks() { } } - private static class NoPoolConnectionPool implements DataSource { // Note that this depends on the fact that no-one in this class calls anything but // getConnection. If you want to use any of the Logger or wrap calls you'll have to diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java index e8ac71ae55..87130a519d 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java @@ -44,7 +44,7 @@ /** * Prefix for key when committing with a key/value. */ - String TXN_KEY_START = "_meta"; + public static final String TXN_KEY_START = "_meta"; enum MUTEX_KEY { Initiator, Cleaner, HouseKeeper, CompactionHistory, CheckLock, @@ -380,19 +380,18 @@ void onRename(String oldCatName, String oldDbName, String oldTabName, String old /** * Clean up entries from TXN_TO_WRITE_ID table less than min_uncommited_txnid as found by - * min(max(TXNS.txn_id), min(WRITE_SET.WS_COMMIT_ID), min(Aborted TXNS.txn_id)). + * min(NEXT_TXN_ID.ntxn_next, min(MIN_HISTORY_LEVEL.mhl_min_open_txnid), min(Aborted TXNS.txn_id)). */ @RetrySemantics.SafeToRetry void cleanTxnToWriteIdTable() throws MetaException; /** - * Clean up aborted or committed transactions from txns that have no components in txn_components. The reason such - * txns exist can be that no work was done in this txn (e.g. Streaming opened TransactionBatch and - * abandoned it w/o doing any work) or due to {@link #markCleaned(CompactionInfo)} being called, - * or the delete from the txns was delayed because of TXN_OPENTXN_TIMEOUT window. + * Clean up aborted transactions from txns that have no components in txn_components. The reson such + * txns exist can be that now work was done in this txn (e.g. Streaming opened TransactionBatch and + * abandoned it w/o doing any work) or due to {@link #markCleaned(CompactionInfo)} being called. */ @RetrySemantics.SafeToRetry - void cleanEmptyAbortedAndCommittedTxns() throws MetaException; + void cleanEmptyAbortedTxns() throws MetaException; /** * This will take all entries assigned to workers @@ -443,7 +442,7 @@ void onRename(String oldCatName, String oldDbName, String oldTabName, String old * transaction metadata once it becomes unnecessary. */ @RetrySemantics.SafeToRetry - void performWriteSetGC() throws MetaException; + void performWriteSetGC(); /** * Determine if there are enough consecutive failures compacting a table or partition that no @@ -462,12 +461,6 @@ void onRename(String oldCatName, String oldDbName, String oldTabName, String old @VisibleForTesting long setTimeout(long milliseconds); - @VisibleForTesting - long getOpenTxnTimeOutMillis(); - - @VisibleForTesting - void setOpenTxnTimeOutMillis(long openTxnTimeOutMillis); - @RetrySemantics.Idempotent MutexAPI getMutexAPI(); @@ -480,7 +473,7 @@ void onRename(String oldCatName, String oldDbName, String oldTabName, String old */ interface MutexAPI { /** - * The {@code key} is name of the lock. Will acquire an exclusive lock or block. It returns + * The {@code key} is name of the lock. Will acquire and exclusive lock or block. It retuns * a handle which must be used to release the lock. Each invocation returns a new handle. */ LockHandle acquireLock(String key) throws MetaException; diff --git a/standalone-metastore/metastore-server/src/main/sql/derby/hive-schema-4.0.0.derby.sql b/standalone-metastore/metastore-server/src/main/sql/derby/hive-schema-4.0.0.derby.sql index 727abce951..366b6f02c1 100644 --- a/standalone-metastore/metastore-server/src/main/sql/derby/hive-schema-4.0.0.derby.sql +++ b/standalone-metastore/metastore-server/src/main/sql/derby/hive-schema-4.0.0.derby.sql @@ -234,8 +234,9 @@ CREATE TABLE "APP"."CTLGS" ( "CREATE_TIME" INTEGER); -- Insert a default value. The location is TBD. Hive will fix this when it starts -INSERT INTO "APP"."CTLGS" ("CTLG_ID", "NAME", "DESC", "LOCATION_URI", "CREATE_TIME") -VALUES (1, 'hive', 'Default catalog for Hive', 'TBD', NULL); +INSERT INTO "APP"."CTLGS" + ("CTLG_ID", "NAME", "DESC", "LOCATION_URI", "CREATE_TIME") + VALUES (1, 'hive', 'Default catalog for Hive', 'TBD', NULL); -- ---------------------------------------------- -- DML Statements @@ -523,7 +524,7 @@ ALTER TABLE "APP"."SDS" ADD CONSTRAINT "SQL110318025505550" CHECK (IS_COMPRESSED -- Transaction and Lock Tables -- ---------------------------- CREATE TABLE TXNS ( - TXN_ID bigint PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + TXN_ID bigint PRIMARY KEY, TXN_STATE char(1) NOT NULL, TXN_STARTED bigint NOT NULL, TXN_LAST_HEARTBEAT bigint NOT NULL, @@ -535,9 +536,6 @@ CREATE TABLE TXNS ( TXN_TYPE integer ); -INSERT INTO TXNS (TXN_ID, TXN_STATE, TXN_STARTED, TXN_LAST_HEARTBEAT, TXN_USER, TXN_HOST) - VALUES(0, 'c', 0, 0, '', ''); - CREATE TABLE TXN_COMPONENTS ( TC_TXNID bigint NOT NULL REFERENCES TXNS (TXN_ID), TC_DATABASE varchar(128) NOT NULL, @@ -561,10 +559,10 @@ CREATE TABLE COMPLETED_TXN_COMPONENTS ( CREATE INDEX COMPLETED_TXN_COMPONENTS_IDX ON COMPLETED_TXN_COMPONENTS (CTC_DATABASE, CTC_TABLE, CTC_PARTITION); -CREATE TABLE TXN_LOCK_TBL ( - TXN_LOCK bigint NOT NULL +CREATE TABLE NEXT_TXN_ID ( + NTXN_NEXT bigint NOT NULL ); -INSERT INTO TXN_LOCK_TBL VALUES(1); +INSERT INTO NEXT_TXN_ID VALUES(1); CREATE TABLE HIVE_LOCKS ( HL_LOCK_EXT_ID bigint NOT NULL, diff --git a/standalone-metastore/metastore-server/src/main/sql/derby/upgrade-3.2.0-to-4.0.0.derby.sql b/standalone-metastore/metastore-server/src/main/sql/derby/upgrade-3.2.0-to-4.0.0.derby.sql index db2f43df1c..8a3cd56658 100644 --- a/standalone-metastore/metastore-server/src/main/sql/derby/upgrade-3.2.0-to-4.0.0.derby.sql +++ b/standalone-metastore/metastore-server/src/main/sql/derby/upgrade-3.2.0-to-4.0.0.derby.sql @@ -68,19 +68,5 @@ ALTER TABLE "APP"."DBS" ADD COLUMN "DB_MANAGED_LOCATION_URI" VARCHAR(4000); ALTER TABLE COMPACTION_QUEUE ADD CQ_NEXT_TXN_ID bigint; DROP TABLE MIN_HISTORY_LEVEL; --- HIVE-23048 -INSERT INTO TXNS (TXN_ID, TXN_STATE, TXN_STARTED, TXN_LAST_HEARTBEAT, TXN_USER, TXN_HOST) - SELECT COALESCE(MAX(CTC_TXNID),0), 'c', 0, 0, '_', '_' FROM COMPLETED_TXN_COMPONENTS; -INSERT INTO TXNS (TXN_ID, TXN_STATE, TXN_STARTED, TXN_LAST_HEARTBEAT, TXN_USER, TXN_HOST) - VALUES (1000000000, 'c', 0, 0, '_', '_'); -ALTER TABLE TXNS ADD COLUMN TXN_ID_TMP bigint; -UPDATE TXNS SET TXN_ID_TMP=TXN_ID; -ALTER TABLE TXNS DROP COLUMN TXN_ID; -ALTER TABLE TXNS ADD COLUMN TXN_ID BIGINT PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY (START WITH 1000000001, INCREMENT BY 1); -UPDATE TXNS SET TXN_ID=TXN_ID_TMP; -ALTER TABLE TXNS DROP COLUMN TXN_ID_TMP; - -RENAME TABLE NEXT_TXN_ID TO TXN_LOCK_TBL; - -- This needs to be the last thing done. Insert any changes above this line. UPDATE "APP".VERSION SET SCHEMA_VERSION='4.0.0', VERSION_COMMENT='Hive release version 4.0.0' where VER_ID=1; diff --git a/standalone-metastore/metastore-server/src/main/sql/mssql/hive-schema-4.0.0.mssql.sql b/standalone-metastore/metastore-server/src/main/sql/mssql/hive-schema-4.0.0.mssql.sql index 6906bdf6b9..2e0177723d 100644 --- a/standalone-metastore/metastore-server/src/main/sql/mssql/hive-schema-4.0.0.mssql.sql +++ b/standalone-metastore/metastore-server/src/main/sql/mssql/hive-schema-4.0.0.mssql.sql @@ -1096,31 +1096,28 @@ CREATE TABLE NEXT_LOCK_ID( INSERT INTO NEXT_LOCK_ID VALUES(1); -CREATE TABLE TXN_LOCK_TBL( - TXN_LOCK bigint NOT NULL +CREATE TABLE NEXT_TXN_ID( + NTXN_NEXT bigint NOT NULL ); -INSERT INTO TXN_LOCK_TBL VALUES(1); +INSERT INTO NEXT_TXN_ID VALUES(1); CREATE TABLE TXNS( - TXN_ID bigint NOT NULL IDENTITY(1,1), - TXN_STATE char(1) NOT NULL, - TXN_STARTED bigint NOT NULL, - TXN_LAST_HEARTBEAT bigint NOT NULL, - TXN_USER nvarchar(128) NOT NULL, - TXN_HOST nvarchar(128) NOT NULL, + TXN_ID bigint NOT NULL, + TXN_STATE char(1) NOT NULL, + TXN_STARTED bigint NOT NULL, + TXN_LAST_HEARTBEAT bigint NOT NULL, + TXN_USER nvarchar(128) NOT NULL, + TXN_HOST nvarchar(128) NOT NULL, TXN_AGENT_INFO nvarchar(128) NULL, TXN_META_INFO nvarchar(128) NULL, TXN_HEARTBEAT_COUNT int NULL, TXN_TYPE int NULL, PRIMARY KEY CLUSTERED ( - TXN_ID ASC + TXN_ID ASC ) ); -SET IDENTITY_INSERT TXNS ON; -INSERT INTO TXNS (TXN_ID, TXN_STATE, TXN_STARTED, TXN_LAST_HEARTBEAT, TXN_USER, TXN_HOST) - VALUES(0, 'c', 0, 0, '', ''); CREATE TABLE TXN_COMPONENTS( TC_TXNID bigint NOT NULL, diff --git a/standalone-metastore/metastore-server/src/main/sql/mssql/upgrade-3.2.0-to-4.0.0.mssql.sql b/standalone-metastore/metastore-server/src/main/sql/mssql/upgrade-3.2.0-to-4.0.0.mssql.sql index 7583c5cf66..9f3951575b 100644 --- a/standalone-metastore/metastore-server/src/main/sql/mssql/upgrade-3.2.0-to-4.0.0.mssql.sql +++ b/standalone-metastore/metastore-server/src/main/sql/mssql/upgrade-3.2.0-to-4.0.0.mssql.sql @@ -68,56 +68,9 @@ INSERT INTO NOTIFICATION_SEQUENCE (NNI_ID, NEXT_EVENT_ID) SELECT 1,1 WHERE NOT E ALTER TABLE DBS ADD DB_MANAGED_LOCATION_URI nvarchar(4000); -- HIVE-23107 -ALTER TABLE COMPACTION_QUEUE ADD CQ_NEXT_TXN_ID bigint NOT NULL; +ALTER TABLE COMPACTION_QUEUE bigint CQ_NEXT_TXN_ID NOT NULL; DROP TABLE MIN_HISTORY_LEVEL; --- HIVE-23048 -INSERT INTO TXNS (TXN_ID, TXN_STATE, TXN_STARTED, TXN_LAST_HEARTBEAT, TXN_USER, TXN_HOST) - SELECT COALESCE(MAX(CTC_TXNID),0), 'c', 0, 0, '', '' FROM COMPLETED_TXN_COMPONENTS; - -CREATE TABLE TMP_TXNS( - TXN_ID bigint NOT NULL IDENTITY(1,1), - TXN_STATE char(1) NOT NULL, - TXN_STARTED bigint NOT NULL, - TXN_LAST_HEARTBEAT bigint NOT NULL, - TXN_USER nvarchar(128) NOT NULL, - TXN_HOST nvarchar(128) NOT NULL, - TXN_AGENT_INFO nvarchar(128) NULL, - TXN_META_INFO nvarchar(128) NULL, - TXN_HEARTBEAT_COUNT int NULL, - TXN_TYPE int NULL, -PRIMARY KEY CLUSTERED -( - TXN_ID ASC -) -); - -SET IDENTITY_INSERT TMP_TXNS ON; -INSERT INTO TMP_TXNS (TXN_ID,TXN_STATE, TXN_STARTED, TXN_LAST_HEARTBEAT, TXN_USER, TXN_HOST, TXN_AGENT_INFO, TXN_META_INFO, TXN_HEARTBEAT_COUNT, TXN_TYPE) -SELECT TXN_ID, TXN_STATE, TXN_STARTED, TXN_LAST_HEARTBEAT, TXN_USER, TXN_HOST, TXN_AGENT_INFO, TXN_META_INFO, TXN_HEARTBEAT_COUNT, TXN_TYPE FROM TXNS TABLOCKX; - -SET IDENTITY_INSERT TMP_TXNS OFF; - -CREATE TABLE TMP_TXN_COMPONENTS( - TC_TXNID bigint NOT NULL, - TC_DATABASE nvarchar(128) NOT NULL, - TC_TABLE nvarchar(128) NULL, - TC_PARTITION nvarchar(767) NULL, - TC_OPERATION_TYPE char(1) NOT NULL, - TC_WRITEID bigint -); -INSERT INTO TMP_TXN_COMPONENTS SELECT * FROM TXN_COMPONENTS; - -DROP TABLE TXN_COMPONENTS; -DROP TABLE TXNS; - -Exec sp_rename 'TMP_TXNS', 'TXNS'; -Exec sp_rename 'TMP_TXN_COMPONENTS', 'TXN_COMPONENTS'; -Exec sp_rename 'NEXT_TXN_ID', 'TXN_LOCK_TBL'; - -ALTER TABLE TXN_COMPONENTS WITH CHECK ADD FOREIGN KEY(TC_TXNID) REFERENCES TXNS (TXN_ID); -CREATE INDEX TC_TXNID_INDEX ON TXN_COMPONENTS (TC_TXNID); - -- These lines need to be last. Insert any changes above. UPDATE VERSION SET SCHEMA_VERSION='4.0.0', VERSION_COMMENT='Hive release version 4.0.0' where VER_ID=1; SELECT 'Finished upgrading MetaStore schema from 3.2.0 to 4.0.0' AS MESSAGE; diff --git a/standalone-metastore/metastore-server/src/main/sql/mysql/hive-schema-4.0.0.mysql.sql b/standalone-metastore/metastore-server/src/main/sql/mysql/hive-schema-4.0.0.mysql.sql index b7f423c326..0512a45cad 100644 --- a/standalone-metastore/metastore-server/src/main/sql/mysql/hive-schema-4.0.0.mysql.sql +++ b/standalone-metastore/metastore-server/src/main/sql/mysql/hive-schema-4.0.0.mysql.sql @@ -989,7 +989,7 @@ CREATE TABLE IF NOT EXISTS WM_MAPPING -- Transaction and Lock Tables -- ---------------------------- CREATE TABLE TXNS ( - TXN_ID bigint PRIMARY KEY AUTO_INCREMENT, + TXN_ID bigint PRIMARY KEY, TXN_STATE char(1) NOT NULL, TXN_STARTED bigint NOT NULL, TXN_LAST_HEARTBEAT bigint NOT NULL, @@ -1001,9 +1001,6 @@ CREATE TABLE TXNS ( TXN_TYPE int ) ENGINE=InnoDB DEFAULT CHARSET=latin1; -INSERT INTO TXNS (TXN_ID, TXN_STATE, TXN_STARTED, TXN_LAST_HEARTBEAT, TXN_USER, TXN_HOST) - VALUES(0, 'c', 0, 0, '', ''); - CREATE TABLE TXN_COMPONENTS ( TC_TXNID bigint NOT NULL, TC_DATABASE varchar(128) NOT NULL, @@ -1028,10 +1025,10 @@ CREATE TABLE COMPLETED_TXN_COMPONENTS ( CREATE INDEX COMPLETED_TXN_COMPONENTS_IDX ON COMPLETED_TXN_COMPONENTS (CTC_DATABASE, CTC_TABLE, CTC_PARTITION) USING BTREE; -CREATE TABLE TXN_LOCK_TBL ( - TXN_LOCK bigint NOT NULL +CREATE TABLE NEXT_TXN_ID ( + NTXN_NEXT bigint NOT NULL ) ENGINE=InnoDB DEFAULT CHARSET=latin1; -INSERT INTO TXN_LOCK_TBL VALUES(1); +INSERT INTO NEXT_TXN_ID VALUES(1); CREATE TABLE HIVE_LOCKS ( HL_LOCK_EXT_ID bigint NOT NULL, diff --git a/standalone-metastore/metastore-server/src/main/sql/mysql/upgrade-3.2.0-to-4.0.0.mysql.sql b/standalone-metastore/metastore-server/src/main/sql/mysql/upgrade-3.2.0-to-4.0.0.mysql.sql index 78a841a499..4b82e36ab4 100644 --- a/standalone-metastore/metastore-server/src/main/sql/mysql/upgrade-3.2.0-to-4.0.0.mysql.sql +++ b/standalone-metastore/metastore-server/src/main/sql/mysql/upgrade-3.2.0-to-4.0.0.mysql.sql @@ -72,23 +72,6 @@ ALTER TABLE DBS ADD COLUMN DB_MANAGED_LOCATION_URI VARCHAR(4000) CHARACTER SET l ALTER TABLE COMPACTION_QUEUE ADD CQ_NEXT_TXN_ID bigint; DROP TABLE MIN_HISTORY_LEVEL; --- HIVE-23048 -INSERT INTO TXNS (TXN_ID, TXN_STATE, TXN_STARTED, TXN_LAST_HEARTBEAT, TXN_USER, TXN_HOST) - SELECT IFNULL(MAX(CTC_TXNID),0), 'c', 0, 0, '', '' FROM COMPLETED_TXN_COMPONENTS; -ALTER TABLE TXNS ADD COLUMN TXN_ID_TMP BIGINT; -UPDATE TXNS SET TXN_ID_TMP=TXN_ID; -SET FOREIGN_KEY_CHECKS = 0; -ALTER TABLE TXNS MODIFY TXN_ID BIGINT AUTO_INCREMENT; -SET FOREIGN_KEY_CHECKS = 1; -UPDATE TXNS SET TXN_ID=TXN_ID_TMP; -ALTER TABLE TXNS DROP COLUMN TXN_ID_TMP; -SELECT MAX(TXN_ID) + 1 INTO @AutoInc FROM TXNS; -SET @s:=CONCAT('ALTER TABLE TXNS AUTO_INCREMENT=', @AutoInc); -PREPARE stmt FROM @s; -EXECUTE stmt; -DEALLOCATE PREPARE stmt; -RENAME TABLE NEXT_TXN_ID TO TXN_LOCK_TBL; - -- These lines need to be last. Insert any changes above. UPDATE VERSION SET SCHEMA_VERSION='4.0.0', VERSION_COMMENT='Hive release version 4.0.0' where VER_ID=1; SELECT 'Finished upgrading MetaStore schema from 3.2.0 to 4.0.0' AS MESSAGE; diff --git a/standalone-metastore/metastore-server/src/main/sql/oracle/hive-schema-4.0.0.oracle.sql b/standalone-metastore/metastore-server/src/main/sql/oracle/hive-schema-4.0.0.oracle.sql index 0082dcd06c..db398e5f66 100644 --- a/standalone-metastore/metastore-server/src/main/sql/oracle/hive-schema-4.0.0.oracle.sql +++ b/standalone-metastore/metastore-server/src/main/sql/oracle/hive-schema-4.0.0.oracle.sql @@ -972,7 +972,7 @@ ALTER TABLE MV_TABLES_USED ADD CONSTRAINT MV_TABLES_USED_FK2 FOREIGN KEY (TBL_ID -- Transaction and lock tables ------------------------------ CREATE TABLE TXNS ( - TXN_ID NUMBER(19) GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY, + TXN_ID NUMBER(19) PRIMARY KEY, TXN_STATE char(1) NOT NULL, TXN_STARTED NUMBER(19) NOT NULL, TXN_LAST_HEARTBEAT NUMBER(19) NOT NULL, @@ -984,9 +984,6 @@ CREATE TABLE TXNS ( TXN_TYPE number(10) ) ROWDEPENDENCIES; -INSERT INTO TXNS (TXN_ID, TXN_STATE, TXN_STARTED, TXN_LAST_HEARTBEAT, TXN_USER, TXN_HOST) - VALUES(0, 'c', 0, 0, '_', '_'); - CREATE TABLE TXN_COMPONENTS ( TC_TXNID NUMBER(19) NOT NULL REFERENCES TXNS (TXN_ID), TC_DATABASE VARCHAR2(128) NOT NULL, @@ -1010,10 +1007,10 @@ CREATE TABLE COMPLETED_TXN_COMPONENTS ( CREATE INDEX COMPLETED_TXN_COMPONENTS_INDEX ON COMPLETED_TXN_COMPONENTS (CTC_DATABASE, CTC_TABLE, CTC_PARTITION); -CREATE TABLE TXN_LOCK_TBL ( - TXN_LOCK NUMBER(19) NOT NULL +CREATE TABLE NEXT_TXN_ID ( + NTXN_NEXT NUMBER(19) NOT NULL ); -INSERT INTO TXN_LOCK_TBL VALUES(1); +INSERT INTO NEXT_TXN_ID VALUES(1); CREATE TABLE HIVE_LOCKS ( HL_LOCK_EXT_ID NUMBER(19) NOT NULL, diff --git a/standalone-metastore/metastore-server/src/main/sql/oracle/upgrade-3.2.0-to-4.0.0.oracle.sql b/standalone-metastore/metastore-server/src/main/sql/oracle/upgrade-3.2.0-to-4.0.0.oracle.sql index 51f52a44b9..1be83fc4a9 100644 --- a/standalone-metastore/metastore-server/src/main/sql/oracle/upgrade-3.2.0-to-4.0.0.oracle.sql +++ b/standalone-metastore/metastore-server/src/main/sql/oracle/upgrade-3.2.0-to-4.0.0.oracle.sql @@ -72,21 +72,6 @@ ALTER TABLE DBS ADD DB_MANAGED_LOCATION_URI VARCHAR2(4000) NULL; ALTER TABLE COMPACTION_QUEUE ADD CQ_NEXT_TXN_ID NUMBER(19); DROP TABLE MIN_HISTORY_LEVEL; --- HIVE-23048 -INSERT INTO TXNS (TXN_ID, TXN_STATE, TXN_STARTED, TXN_LAST_HEARTBEAT, TXN_USER, TXN_HOST) - SELECT COALESCE(MAX(CTC_TXNID),0), 'c', 0, 0, '_', '_' FROM COMPLETED_TXN_COMPONENTS; -INSERT INTO TXNS (TXN_ID, TXN_STATE, TXN_STARTED, TXN_LAST_HEARTBEAT, TXN_USER, TXN_HOST) - VALUES (1000000000, 'c', 0, 0, '_', '_'); --- DECLARE max_txn NUMBER; --- BEGIN --- SELECT MAX(TXN_ID) + 1 INTO max_txn FROM TXNS; --- EXECUTE IMMEDIATE 'CREATE SEQUENCE TXNS_TXN_ID_SEQ INCREMENT BY 1 START WITH ' || max_txn || ' CACHE 20'; --- END; -CREATE SEQUENCE TXNS_TXN_ID_SEQ INCREMENT BY 1 START WITH 1000000001 CACHE 20; -ALTER TABLE TXNS MODIFY TXN_ID default TXNS_TXN_ID_SEQ.nextval; - -RENAME TABLE NEXT_TXN_ID TO TXN_LOCK_TBL; - -- These lines need to be last. Insert any changes above. UPDATE VERSION SET SCHEMA_VERSION='4.0.0', VERSION_COMMENT='Hive release version 4.0.0' where VER_ID=1; SELECT 'Finished upgrading MetaStore schema from 3.2.0 to 4.0.0' AS Status from dual; diff --git a/standalone-metastore/metastore-server/src/main/sql/postgres/hive-schema-4.0.0.postgres.sql b/standalone-metastore/metastore-server/src/main/sql/postgres/hive-schema-4.0.0.postgres.sql index 717e707407..e6e30160af 100644 --- a/standalone-metastore/metastore-server/src/main/sql/postgres/hive-schema-4.0.0.postgres.sql +++ b/standalone-metastore/metastore-server/src/main/sql/postgres/hive-schema-4.0.0.postgres.sql @@ -1658,7 +1658,7 @@ GRANT ALL ON SCHEMA public TO PUBLIC; -- Transaction and lock tables ------------------------------ CREATE TABLE "TXNS" ( - "TXN_ID" bigserial PRIMARY KEY, + "TXN_ID" bigint PRIMARY KEY, "TXN_STATE" char(1) NOT NULL, "TXN_STARTED" bigint NOT NULL, "TXN_LAST_HEARTBEAT" bigint NOT NULL, @@ -1669,8 +1669,6 @@ CREATE TABLE "TXNS" ( "TXN_HEARTBEAT_COUNT" integer, "TXN_TYPE" integer ); -INSERT INTO "TXNS" ("TXN_ID", "TXN_STATE", "TXN_STARTED", "TXN_LAST_HEARTBEAT", "TXN_USER", "TXN_HOST") - VALUES(0, 'c', 0, 0, '', ''); CREATE TABLE "TXN_COMPONENTS" ( "TC_TXNID" bigint NOT NULL REFERENCES "TXNS" ("TXN_ID"), @@ -1695,10 +1693,10 @@ CREATE TABLE "COMPLETED_TXN_COMPONENTS" ( CREATE INDEX COMPLETED_TXN_COMPONENTS_INDEX ON "COMPLETED_TXN_COMPONENTS" USING btree ("CTC_DATABASE", "CTC_TABLE", "CTC_PARTITION"); -CREATE TABLE "TXN_LOCK_TBL" ( - "TXN_LOCK" bigint NOT NULL +CREATE TABLE "NEXT_TXN_ID" ( + "NTXN_NEXT" bigint NOT NULL ); -INSERT INTO "TXN_LOCK_TBL" VALUES(1); +INSERT INTO "NEXT_TXN_ID" VALUES(1); CREATE TABLE "HIVE_LOCKS" ( "HL_LOCK_EXT_ID" bigint NOT NULL, diff --git a/standalone-metastore/metastore-server/src/main/sql/postgres/upgrade-3.2.0-to-4.0.0.postgres.sql b/standalone-metastore/metastore-server/src/main/sql/postgres/upgrade-3.2.0-to-4.0.0.postgres.sql index 63a5c4422c..b90cecb173 100644 --- a/standalone-metastore/metastore-server/src/main/sql/postgres/upgrade-3.2.0-to-4.0.0.postgres.sql +++ b/standalone-metastore/metastore-server/src/main/sql/postgres/upgrade-3.2.0-to-4.0.0.postgres.sql @@ -203,15 +203,6 @@ ALTER TABLE "DBS" ADD "DB_MANAGED_LOCATION_URI" character varying(4000); ALTER TABLE "COMPACTION_QUEUE" ADD "CQ_NEXT_TXN_ID" bigint; DROP TABLE "MIN_HISTORY_LEVEL"; --- HIVE-23048 -INSERT INTO "TXNS" ("TXN_ID", "TXN_STATE", "TXN_STARTED", "TXN_LAST_HEARTBEAT", "TXN_USER", "TXN_HOST") - SELECT COALESCE(MAX("CTC_TXNID"),0), 'c', 0, 0, '', '' FROM "COMPLETED_TXN_COMPONENTS"; -CREATE SEQUENCE "TXNS_TXN_ID_SEQ" MINVALUE 0 OWNED BY "TXNS"."TXN_ID"; -select setval('"TXNS_TXN_ID_SEQ"', (SELECT MAX("TXN_ID") FROM "TXNS")); -ALTER TABLE "TXNS" ALTER "TXN_ID" SET DEFAULT nextval('"TXNS_TXN_ID_SEQ"'); - -ALTER TABLE "NEXT_TXN_ID" RENAME TO "TXN_LOCK_TBL"; - -- These lines need to be last. Insert any changes above. UPDATE "VERSION" SET "SCHEMA_VERSION"='4.0.0', "VERSION_COMMENT"='Hive release version 4.0.0' where "VER_ID"=1; SELECT 'Finished upgrading MetaStore schema from 3.2.0 to 4.0.0'; diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/DbInstallBase.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/DbInstallBase.java index 2ebba393bc..c1a1629548 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/DbInstallBase.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/DbInstallBase.java @@ -36,7 +36,6 @@ public void upgrade() throws HiveMetaException { Assert.assertEquals(0, getRule().createUser()); Assert.assertEquals(0, getRule().installAVersion(FIRST_VERSION)); Assert.assertEquals(0, getRule().upgradeToLatest()); - Assert.assertEquals(0, getRule().validateSchema()); } protected abstract DatabaseRule getRule(); diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/DatabaseRule.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/DatabaseRule.java index e06011f6a5..a6d22d19ef 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/DatabaseRule.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/DatabaseRule.java @@ -68,11 +68,11 @@ public DatabaseRule() { public DatabaseRule setVerbose(boolean verbose) { this.verbose = verbose; return this; - } + }; public String getDb() { return HIVE_DB; - } + }; /** * URL to use when connecting as root rather than Hive @@ -147,7 +147,7 @@ public void after() { // stopAndRmDockerContainer protected String getDockerContainerName(){ return String.format("metastore-test-%s-install", getDbType()); - } + }; private ProcessResults runCmd(String[] cmd, long secondsToWait) throws IOException, InterruptedException { @@ -275,7 +275,7 @@ public int upgradeToLatest() { "-dbType", getDbType(), "-userName", - getHiveUser(), + HIVE_USER, "-passWord", getHivePassword(), "-url", @@ -289,20 +289,4 @@ public void install() { createUser(); installLatest(); } - - public int validateSchema() { - return new MetastoreSchemaTool().setVerbose(verbose).run(buildArray( - "-validate", - "-dbType", - getDbType(), - "-userName", - getHiveUser(), - "-passWord", - getHivePassword(), - "-url", - getJdbcUrl(), - "-driver", - getJdbcDriver() - )); - } } diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/Oracle.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/Oracle.java index 21c5de1fb9..0b070e19ac 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/Oracle.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/Oracle.java @@ -24,7 +24,7 @@ @Override public String getDockerImageName() { - return "pvargacl/oracle-xe-18.4.0"; + return "orangehrm/oracle-xe-11g"; } @Override @@ -32,6 +32,10 @@ public String getDockerImageName() { return buildArray( "-p", "1521:1521", + "-e", + "DEFAULT_SYS_PASS=" + getDbRootPassword(), + "-e", + "ORACLE_ALLOW_REMOTE=true", "-d" ); } @@ -68,16 +72,11 @@ public String getInitialJdbcUrl() { @Override public boolean isContainerReady(String logOutput) { - return logOutput.contains("DATABASE IS READY TO USE!"); + return logOutput.contains("Oracle started successfully!"); } @Override public String getHivePassword() { return HIVE_PASSWORD; } - - @Override - public String getHiveUser() { - return "c##"+ super.getHiveUser(); - } } diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestOpenTxn.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestOpenTxn.java deleted file mode 100644 index 8e1794a8a6..0000000000 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestOpenTxn.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.metastore.txn; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.api.CommitTxnRequest; -import org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.OpenTxnRequest; -import org.apache.hadoop.hive.metastore.conf.MetastoreConf; -import org.apache.hadoop.hive.metastore.datasource.DataSourceProvider; -import org.apache.hadoop.hive.metastore.datasource.DataSourceProviderFactory; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import javax.sql.DataSource; -import java.sql.Connection; -import java.sql.SQLException; -import java.sql.Statement; - -/** - * Test openTxn and getOpenTxnList calls on TxnStore. - */ -public class TestOpenTxn { - - private Configuration conf = MetastoreConf.newMetastoreConf(); - private TxnStore txnHandler; - - @Before - public void setUp() throws Exception { - // This will init the metastore db - txnHandler = TxnUtils.getTxnStore(conf); - TxnDbUtil.prepDb(conf); - } - - @After - public void tearDown() throws Exception { - TxnDbUtil.cleanDb(conf); - } - - @Test - public void testSingleOpen() throws MetaException { - OpenTxnRequest openTxnRequest = new OpenTxnRequest(1, "me", "localhost"); - long txnId = txnHandler.openTxns(openTxnRequest).getTxn_ids().get(0); - Assert.assertEquals(1, txnId); - } - - @Test - public void testGap() throws Exception { - OpenTxnRequest openTxnRequest = new OpenTxnRequest(1, "me", "localhost"); - txnHandler.openTxns(openTxnRequest); - long second = txnHandler.openTxns(openTxnRequest).getTxn_ids().get(0); - deleteTransaction(second); - txnHandler.openTxns(openTxnRequest); - GetOpenTxnsResponse openTxns = txnHandler.getOpenTxns(); - Assert.assertEquals(3, openTxns.getOpen_txnsSize()); - - } - - @Test - public void testGapWithOldOpen() throws Exception { - OpenTxnRequest openTxnRequest = new OpenTxnRequest(1, "me", "localhost"); - txnHandler.openTxns(openTxnRequest); - Thread.sleep(1000); - long second = txnHandler.openTxns(openTxnRequest).getTxn_ids().get(0); - deleteTransaction(second); - txnHandler.openTxns(openTxnRequest); - GetOpenTxnsResponse openTxns = txnHandler.getOpenTxns(); - Assert.assertEquals(3, openTxns.getOpen_txnsSize()); - } - - @Test - public void testGapWithOldCommit() throws Exception { - OpenTxnRequest openTxnRequest = new OpenTxnRequest(1, "me", "localhost"); - long first = txnHandler.openTxns(openTxnRequest).getTxn_ids().get(0); - txnHandler.commitTxn(new CommitTxnRequest(first)); - long second = txnHandler.openTxns(openTxnRequest).getTxn_ids().get(0); - deleteTransaction(second); - txnHandler.openTxns(openTxnRequest); - GetOpenTxnsResponse openTxns = txnHandler.getOpenTxns(); - Assert.assertEquals(2, openTxns.getOpen_txnsSize()); - } - - @Test - public void testMultiGapWithOldCommit() throws Exception { - OpenTxnRequest openTxnRequest = new OpenTxnRequest(1, "me", "localhost"); - long first = txnHandler.openTxns(openTxnRequest).getTxn_ids().get(0); - txnHandler.commitTxn(new CommitTxnRequest(first)); - long second = txnHandler.openTxns(new OpenTxnRequest(10, "me", "localhost")).getTxn_ids().get(0); - deleteTransaction(second, second + 9); - txnHandler.openTxns(openTxnRequest); - GetOpenTxnsResponse openTxns = txnHandler.getOpenTxns(); - Assert.assertEquals(11, openTxns.getOpen_txnsSize()); - } - - private void deleteTransaction(long txnId) throws SQLException { - deleteTransaction(txnId, txnId); - } - - private void deleteTransaction(long minTxnId, long maxTxnId) throws SQLException { - DataSourceProvider dsp = DataSourceProviderFactory.tryGetDataSourceProviderOrNull(conf); - DataSource ds = dsp.create(conf); - Connection dbConn = ds.getConnection(); - Statement stmt = dbConn.createStatement(); - stmt.executeUpdate("DELETE FROM TXNS WHERE TXN_ID >=" + minTxnId + " AND TXN_ID <=" + maxTxnId); - dbConn.commit(); - stmt.close(); - dbConn.close(); - } - -} diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index 0e678d30ba..4a44345217 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -21,7 +21,6 @@ import java.io.DataOutput; import java.io.IOException; -import org.apache.hadoop.hive.ql.io.filter.MutableFilterContext; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; @@ -32,7 +31,7 @@ * The major fields are public by design to allow fast and convenient * access by the vectorized query execution code. */ -public class VectorizedRowBatch implements Writable, MutableFilterContext { +public class VectorizedRowBatch implements Writable { public int numCols; // number of columns public ColumnVector[] cols; // a vector for each column public int size; // number of rows that qualify (i.e. haven't been filtered out) @@ -339,7 +338,6 @@ public void write(DataOutput arg0) throws IOException { * - resets each column * - inits each column */ - @Override public void reset() { selectedInUse = false; size = 0; @@ -360,64 +358,5 @@ public void ensureSize(int rows) { for (ColumnVector col : cols) { col.ensureSize(rows, false); } - updateSelected(rows); - } - - @Override - public boolean isSelectedInUse() { - return selectedInUse; - } - - @Override - public int[] getSelected() { - return selected; - } - - @Override - public int getSelectedSize() { - return size; - } - - @Override - public void setFilterContext(boolean isSelectedInUse, int[] selected, int selectedSize) { - this.selectedInUse = isSelectedInUse; - this.selected = selected; - this.size = selectedSize; - // Avoid selected.length < selectedSize since we can borrow a larger array for selected - // Debug loop for selected array: use without assert when needed (asserts only fail in testing) - assert validateSelected() : "Selected array may not contain duplicates or unordered values"; - } - - @Override - public boolean validateSelected() { - for (int i = 1; i < size; i++) { - if (selected[i-1] >= selected[i]) { - return false; - } - } - return true; - } - - @Override - public int[] updateSelected(int minCapacity) { - if (selected == null || selected.length < minCapacity) { - selected = new int[minCapacity]; - } - return selected; - } - - @Override - public void setSelectedInUse(boolean selectedInUse) { - this.selectedInUse = selectedInUse; - } - - @Override - public void setSelected(int[] selectedArray) { - selected = selectedArray; - } - - @Override - public void setSelectedSize(int selectedSize) { - size = selectedSize; } } diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java index dcf7b8be7b..d799d6fc8d 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java @@ -26,29 +26,47 @@ * actually selected any rows. * */ -public interface FilterContext { +public abstract class FilterContext { + + protected boolean currBatchIsSelectedInUse = false; + protected int[] currBatchSelected = null; + protected int currBatchSelectedSize = 0; + + public FilterContext() { + super(); + } /** * Reset FilterContext variables. */ - void reset(); + public void resetFilterContext() { + this.currBatchIsSelectedInUse = false; + this.currBatchSelected = null; + this.currBatchSelectedSize = 0; + } /** * Is the filter applied? * @return true if the filter is actually applied */ - boolean isSelectedInUse(); + public boolean isSelectedInUse() { + return this.currBatchIsSelectedInUse; + } /** * Return an int array with the rows that pass the filter. * Do not modify the array returned! * @return int array */ - int[] getSelected(); + public int[] getSelected() { + return this.currBatchSelected; + } /** * Return the number of rows that pass the filter. * @return an int */ - int getSelectedSize(); + public int getSelectedSize() { + return this.currBatchSelectedSize; + } } diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java index 55a4cc73d7..73ed766264 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.io.filter; +import java.util.Arrays; + /** * A representation of a Filter applied on the rows of a VectorizedRowBatch * {@link org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch}. @@ -26,7 +28,7 @@ * actually selected any rows. * */ -public interface MutableFilterContext extends FilterContext { +public class MutableFilterContext extends FilterContext { /** * Set context with the given values by reference. @@ -34,29 +36,71 @@ * @param selected an array of the selected rows * @param selectedSize the number of the selected rows */ - void setFilterContext(boolean isSelectedInUse, int[] selected, int selectedSize); + public void setFilterContext(boolean isSelectedInUse, int[] selected, int selectedSize) { + this.currBatchIsSelectedInUse = isSelectedInUse; + this.currBatchSelected = selected; + this.currBatchSelectedSize = selectedSize; + // Avoid selected.length < selectedSize since we can borrow a larger array for selected + // Debug loop for selected array: use without assert when needed (asserts only fail in testing) + assert isValidSelected() : "Selected array may not contain duplicates or unordered values"; + } + + /** + * Copy context variables from the a given FilterContext. + * Always does a deep copy of the data. + * @param other FilterContext to copy from + */ + public void copyFilterContextFrom(MutableFilterContext other) { + // assert if copying into self (can fail only in testing) + assert this != other: "May not copy a FilterContext to itself"; + + if (this != other) { + if (this.currBatchSelected == null || this.currBatchSelected.length < other.currBatchSelectedSize) { + // note: still allocating a full size buffer, for later use + this.currBatchSelected = Arrays.copyOf(other.currBatchSelected, other.currBatchSelected.length); + } else { + System.arraycopy(other.currBatchSelected, 0, this.currBatchSelected, 0, other.currBatchSelectedSize); + } + this.currBatchSelectedSize = other.currBatchSelectedSize; + this.currBatchIsSelectedInUse = other.currBatchIsSelectedInUse; + } + } /** * Validate method checking if existing selected array contains accepted values. * Values should be in order and without duplicates i.e [1,1,1] is illegal * @return true if the selected array is valid */ - boolean validateSelected(); + public boolean isValidSelected() { + for (int i = 1; i < this.currBatchSelectedSize; i++) { + if (this.currBatchSelected[i-1] >= this.currBatchSelected[i]) { + return false; + } + } + return true; + } /** - * Get an array for selected that is expected to be modified. + * Borrow the current selected array to be modified if it satisfies minimum capacity. * If it is too small or unset, allocates one. * This method never returns null! * @param minCapacity * @return the current selected array to be modified */ - int[] updateSelected(int minCapacity); + public int[] borrowSelected(int minCapacity) { + int[] existing = this.currBatchSelected; + this.currBatchSelected = null; + if (existing == null || existing.length < minCapacity) { + return new int[minCapacity]; + } + return existing; + } /** * Get the immutable version of the current FilterContext. * @return immutable FilterContext instance */ - default FilterContext immutable() { + public FilterContext immutable(){ return this; } @@ -64,17 +108,23 @@ default FilterContext immutable() { * Set the selectedInUse boolean showing if the filter is applied. * @param selectedInUse */ - void setSelectedInUse(boolean selectedInUse); + public void setSelectedInUse(boolean selectedInUse) { + this.currBatchIsSelectedInUse = selectedInUse; + } /** * Set the array of the rows that pass the filter by reference. * @param selectedArray */ - void setSelected(int[] selectedArray); + public void setSelected(int[] selectedArray) { + this.currBatchSelected = selectedArray; + } /** * Set the number of the rows that pass the filter. * @param selectedSize */ - void setSelectedSize(int selectedSize); + public void setSelectedSize(int selectedSize) { + this.currBatchSelectedSize = selectedSize; + } }