diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 75b77072c6..3be5a8dee4 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3052,10 +3052,6 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "hive.tez.exec.print.summary", false, "Display breakdown of execution steps, for every query executed by the shell."), - TEZ_SESSION_EVENTS_SUMMARY( - "hive.tez.session.events.print.summary", - "none", new StringSet("none", "text", "json"), - "Display summary of all tez sessions related events in text or json format"), TEZ_EXEC_INPLACE_PROGRESS( "hive.tez.exec.inplace.progress", true, diff --git a/itests/hive-unit/pom.xml b/itests/hive-unit/pom.xml index 3a435a8a52..ea5b7b9480 100644 --- a/itests/hive-unit/pom.xml +++ b/itests/hive-unit/pom.xml @@ -361,23 +361,7 @@ ${tez.version} test - - org.slf4j - slf4j-log4j12 - - - commmons-logging - commons-logging - - - - - org.apache.tez - tez-runtime-internals - ${tez.version} - test - - + org.slf4j slf4j-log4j12 diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractJdbcTriggersTest.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractJdbcTriggersTest.java index 62ee66f717..235e6c36ed 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractJdbcTriggersTest.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractJdbcTriggersTest.java @@ -21,9 +21,7 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import java.io.ByteArrayOutputStream; import java.io.File; -import java.io.PrintStream; import java.net.URL; import java.sql.Connection; import java.sql.SQLException; @@ -105,7 +103,7 @@ public static void afterTest() throws Exception { } } - private void createSleepUDF() throws SQLException { + void createSleepUDF() throws SQLException { String udfName = TestJdbcWithMiniHS2.SleepMsUDF.class.getName(); Connection con = hs2Conn; Statement stmt = con.createStatement(); @@ -114,65 +112,40 @@ private void createSleepUDF() throws SQLException { } void runQueryWithTrigger(final String query, final List setCmds, - final String expect) throws Exception { - runQueryWithTrigger(query, setCmds, expect, null); - } - - void runQueryWithTrigger(final String query, final List setCmds, - final String expect, final List errCaptureExpect) + final String expect) throws Exception { Connection con = hs2Conn; TestJdbcWithMiniLlap.createTestTable(con, null, tableName, kvDataFilePath.toString()); createSleepUDF(); - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - System.setErr(new PrintStream(baos)); // capture stderr final Statement selStmt = con.createStatement(); final Throwable[] throwable = new Throwable[1]; - try { - Thread queryThread = new Thread(() -> { - try { - if (setCmds != null) { - for (String setCmd : setCmds) { - selStmt.execute(setCmd); - } + Thread queryThread = new Thread(() -> { + try { + if (setCmds != null) { + for (String setCmd : setCmds) { + selStmt.execute(setCmd); } - selStmt.execute(query); - } catch (SQLException e) { - throwable[0] = e; - } - }); - queryThread.start(); - - queryThread.join(); - selStmt.close(); - - if (expect == null) { - assertNull("Expected query to succeed", throwable[0]); - } else { - assertNotNull("Expected non-null throwable", throwable[0]); - assertEquals(SQLException.class, throwable[0].getClass()); - assertTrue(expect + " is not contained in " + throwable[0].getMessage(), - throwable[0].getMessage().contains(expect)); - } - - if (errCaptureExpect != null && !errCaptureExpect.isEmpty()) { - // failure hooks are run after HiveStatement is closed. wait sometime for failure hook to execute - String stdErrStr = ""; - while (!stdErrStr.contains(errCaptureExpect.get(0))) { - baos.flush(); - stdErrStr = baos.toString(); - Thread.sleep(500); - } - for (String errExpect : errCaptureExpect) { - assertTrue("'" + errExpect + "' expected in STDERR capture, but not found.", stdErrStr.contains(errExpect)); } + selStmt.execute(query); + } catch (SQLException e) { + throwable[0] = e; } - } finally { - baos.close(); + }); + queryThread.start(); + + queryThread.join(); + selStmt.close(); + + if (expect == null) { + assertNull("Expected query to succeed", throwable[0]); + } else { + assertNotNull("Expected non-null throwable", throwable[0]); + assertEquals(SQLException.class, throwable[0].getClass()); + assertTrue(expect + " is not contained in " + throwable[0].getMessage(), + throwable[0].getMessage().contains(expect)); } - } abstract void setupTriggers(final List triggers) throws Exception; diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestTriggersMoveWorkloadManager.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestTriggersMoveWorkloadManager.java index 74ca958ea8..a983855667 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestTriggersMoveWorkloadManager.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestTriggersMoveWorkloadManager.java @@ -18,9 +18,16 @@ import static org.apache.hadoop.hive.ql.exec.tez.TestWorkloadManager.plan; import static org.apache.hadoop.hive.ql.exec.tez.TestWorkloadManager.pool; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import java.io.File; import java.net.URL; +import java.sql.Connection; +import java.sql.SQLException; +import java.sql.Statement; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -87,30 +94,10 @@ public void testTriggerMoveAndKill() throws Exception { new Action(Action.Type.MOVE_TO_POOL, "ETL")); Trigger killTrigger = new ExecutionTrigger("slow_query_kill", killExpression, new Action(Action.Type.KILL_QUERY)); - setupTriggers(Lists.newArrayList(moveTrigger), Lists.newArrayList(killTrigger)); + setupTriggers(Lists.newArrayList(moveTrigger, killTrigger), Lists.newArrayList(killTrigger)); String query = "select sleep(t1.under_col, 5), t1.value from " + tableName + " t1 join " + tableName + " t2 on t1.under_col>=t2.under_col"; - List setCmds = new ArrayList<>(); - setCmds.add("set hive.tez.session.events.print.summary=json"); - setCmds.add("set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecWMEventsSummaryPrinter"); - setCmds.add("set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.PostExecWMEventsSummaryPrinter"); - List errCaptureExpect = new ArrayList<>(); - errCaptureExpect.add("Workload Manager Events Summary"); - errCaptureExpect.add("Event: GET Pool: BI Cluster %: 80.00"); - errCaptureExpect.add("Event: MOVE Pool: ETL Cluster %: 20.00"); - errCaptureExpect.add("Event: KILL Pool: null Cluster %: 0.00"); - errCaptureExpect.add("Event: RETURN Pool: null Cluster %: 0.00"); - errCaptureExpect.add("\"eventType\" : \"GET\""); - errCaptureExpect.add("\"eventType\" : \"MOVE\""); - errCaptureExpect.add("\"eventType\" : \"KILL\""); - errCaptureExpect.add("\"eventType\" : \"RETURN\""); - errCaptureExpect.add("\"name\" : \"slow_query_move\""); - errCaptureExpect.add("\"name\" : \"slow_query_kill\""); - // violation in BI queue - errCaptureExpect.add("\"violationMsg\" : \"Trigger " + moveTrigger + " violated"); - // violation in ETL queue - errCaptureExpect.add("\"violationMsg\" : \"Trigger " + killTrigger + " violated"); - runQueryWithTrigger(query, setCmds, killTrigger + " violated", errCaptureExpect); + runQueryWithTrigger(query, null, killTrigger + " violated"); } @Test(timeout = 60000) @@ -124,65 +111,7 @@ public void testTriggerMoveEscapeKill() throws Exception { setupTriggers(Lists.newArrayList(moveTrigger, killTrigger), Lists.newArrayList()); String query = "select sleep(t1.under_col, 1), t1.value from " + tableName + " t1 join " + tableName + " t2 on t1.under_col==t2.under_col"; - List setCmds = new ArrayList<>(); - setCmds.add("set hive.tez.session.events.print.summary=json"); - setCmds.add("set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecWMEventsSummaryPrinter"); - setCmds.add("set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.PostExecWMEventsSummaryPrinter"); - List errCaptureExpect = new ArrayList<>(); - errCaptureExpect.add("Workload Manager Events Summary"); - errCaptureExpect.add("Event: GET Pool: BI Cluster %: 80.00"); - errCaptureExpect.add("Event: MOVE Pool: ETL Cluster %: 20.00"); - errCaptureExpect.add("Event: RETURN Pool: null Cluster %: 0.00"); - errCaptureExpect.add("\"eventType\" : \"GET\""); - errCaptureExpect.add("\"eventType\" : \"MOVE\""); - errCaptureExpect.add("\"eventType\" : \"RETURN\""); - errCaptureExpect.add("\"name\" : \"move_big_read\""); - errCaptureExpect.add("\"name\" : \"slow_query_kill\""); - // violation in BI queue - errCaptureExpect.add("\"violationMsg\" : \"Trigger " + moveTrigger + " violated"); - runQueryWithTrigger(query, setCmds, null, errCaptureExpect); - } - - @Test(timeout = 60000) - public void testTriggerMoveBackKill() throws Exception { - Expression moveExpression1 = ExpressionFactory.fromString("HDFS_BYTES_READ > 100"); - Expression moveExpression2 = ExpressionFactory.fromString("SHUFFLE_BYTES > 200"); - Expression killExpression = ExpressionFactory.fromString("EXECUTION_TIME > 2000"); - Trigger moveTrigger1 = new ExecutionTrigger("move_big_read", moveExpression1, - new Action(Action.Type.MOVE_TO_POOL, "ETL")); - Trigger moveTrigger2 = new ExecutionTrigger("move_high", moveExpression2, - new Action(Action.Type.MOVE_TO_POOL, "BI")); - Trigger killTrigger = new ExecutionTrigger("slow_query_kill", killExpression, - new Action(Action.Type.KILL_QUERY)); - setupTriggers(Lists.newArrayList(moveTrigger1, killTrigger), Lists.newArrayList(moveTrigger2)); - String query = "select sleep(t1.under_col, 1), t1.value from " + tableName + " t1 join " + tableName + - " t2 on t1.under_col>=t2.under_col"; - List setCmds = new ArrayList<>(); - setCmds.add("set hive.tez.session.events.print.summary=json"); - setCmds.add("set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecWMEventsSummaryPrinter"); - setCmds.add("set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.PostExecWMEventsSummaryPrinter"); - List errCaptureExpect = new ArrayList<>(); - errCaptureExpect.add("Workload Manager Events Summary"); - errCaptureExpect.add("Event: GET Pool: BI Cluster %: 80.00"); - errCaptureExpect.add("Event: MOVE Pool: ETL Cluster %: 20.00"); - errCaptureExpect.add("Event: MOVE Pool: BI Cluster %: 80.00"); - errCaptureExpect.add("Event: KILL Pool: null Cluster %: 0.00"); - errCaptureExpect.add("Event: RETURN Pool: null Cluster %: 0.00"); - errCaptureExpect.add("\"eventType\" : \"GET\""); - errCaptureExpect.add("\"eventType\" : \"MOVE\""); - errCaptureExpect.add("\"eventType\" : \"MOVE\""); - errCaptureExpect.add("\"eventType\" : \"KILL\""); - errCaptureExpect.add("\"eventType\" : \"RETURN\""); - errCaptureExpect.add("\"name\" : \"move_big_read\""); - errCaptureExpect.add("\"name\" : \"slow_query_kill\""); - errCaptureExpect.add("\"name\" : \"move_high\""); - // violation in BI queue - errCaptureExpect.add("\"violationMsg\" : \"Trigger " + moveTrigger1 + " violated"); - // violation in ETL queue - errCaptureExpect.add("\"violationMsg\" : \"Trigger " + moveTrigger2 + " violated"); - // violation in BI queue - errCaptureExpect.add("\"violationMsg\" : \"Trigger " + killTrigger + " violated"); - runQueryWithTrigger(query, setCmds, killTrigger + " violated", errCaptureExpect); + runQueryWithTrigger(query, null, null); } @Test(timeout = 60000) @@ -196,23 +125,7 @@ public void testTriggerMoveConflictKill() throws Exception { setupTriggers(Lists.newArrayList(moveTrigger, killTrigger), Lists.newArrayList()); String query = "select sleep(t1.under_col, 5), t1.value from " + tableName + " t1 join " + tableName + " t2 on t1.under_col>=t2.under_col"; - List setCmds = new ArrayList<>(); - setCmds.add("set hive.tez.session.events.print.summary=json"); - setCmds.add("set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecWMEventsSummaryPrinter"); - setCmds.add("set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.PostExecWMEventsSummaryPrinter"); - List errCaptureExpect = new ArrayList<>(); - errCaptureExpect.add("Workload Manager Events Summary"); - errCaptureExpect.add("Event: GET Pool: BI Cluster %: 80.00"); - errCaptureExpect.add("Event: KILL Pool: null Cluster %: 0.00"); - errCaptureExpect.add("Event: RETURN Pool: null Cluster %: 0.00"); - errCaptureExpect.add("\"eventType\" : \"GET\""); - errCaptureExpect.add("\"eventType\" : \"KILL\""); - errCaptureExpect.add("\"eventType\" : \"RETURN\""); - errCaptureExpect.add("\"name\" : \"move_big_read\""); - errCaptureExpect.add("\"name\" : \"kill_big_read\""); - // violation in BI queue - errCaptureExpect.add("\"violationMsg\" : \"Trigger " + killTrigger + " violated"); - runQueryWithTrigger(query, setCmds, killTrigger + " violated", errCaptureExpect); + runQueryWithTrigger(query, null, killTrigger + " violated"); } @Override diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index cca1055fde..c91071234c 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -754,6 +754,7 @@ minillaplocal.query.files=\ vector_between_in.q,\ offset_limit_ppd_optimizer.q,\ cluster.q,\ + subquery_corr.q,\ subquery_in.q,\ subquery_multi.q,\ subquery_scalar.q,\ diff --git a/metastore/scripts/upgrade/mysql/046-HIVE-17566.mysql.sql b/metastore/scripts/upgrade/mysql/046-HIVE-17566.mysql.sql index 02288cbe42..34fcfe68fb 100644 --- a/metastore/scripts/upgrade/mysql/046-HIVE-17566.mysql.sql +++ b/metastore/scripts/upgrade/mysql/046-HIVE-17566.mysql.sql @@ -5,7 +5,7 @@ CREATE TABLE IF NOT EXISTS WM_RESOURCEPLAN ( `STATUS` varchar(20) NOT NULL, `DEFAULT_POOL_ID` bigint(20), PRIMARY KEY (`RP_ID`), - UNIQUE KEY `UNIQUE_WM_RESOURCEPLAN` (`NAME`) + KEY `UNIQUE_WM_RESOURCEPLAN` (`NAME`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1; CREATE TABLE IF NOT EXISTS WM_POOL @@ -17,7 +17,7 @@ CREATE TABLE IF NOT EXISTS WM_POOL `QUERY_PARALLELISM` int(11), `SCHEDULING_POLICY` varchar(767), PRIMARY KEY (`POOL_ID`), - UNIQUE KEY `UNIQUE_WM_POOL` (`RP_ID`, `PATH`), + KEY `UNIQUE_WM_POOL` (`RP_ID`, `PATH`), CONSTRAINT `WM_POOL_FK1` FOREIGN KEY (`RP_ID`) REFERENCES `WM_RESOURCEPLAN` (`RP_ID`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1; @@ -31,7 +31,7 @@ CREATE TABLE IF NOT EXISTS WM_TRIGGER `TRIGGER_EXPRESSION` varchar(1024), `ACTION_EXPRESSION` varchar(1024), PRIMARY KEY (`TRIGGER_ID`), - UNIQUE KEY `UNIQUE_WM_TRIGGER` (`RP_ID`, `NAME`), + KEY `UNIQUE_WM_TRIGGER` (`RP_ID`, `NAME`), CONSTRAINT `WM_TRIGGER_FK1` FOREIGN KEY (`RP_ID`) REFERENCES `WM_RESOURCEPLAN` (`RP_ID`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1; @@ -53,7 +53,7 @@ CREATE TABLE IF NOT EXISTS WM_MAPPING `POOL_ID` bigint(20), `ORDERING` int, PRIMARY KEY (`MAPPING_ID`), - UNIQUE KEY `UNIQUE_WM_MAPPING` (`RP_ID`, `ENTITY_TYPE`, `ENTITY_NAME`), + KEY `UNIQUE_WM_MAPPING` (`RP_ID`, `ENTITY_TYPE`, `ENTITY_NAME`), CONSTRAINT `WM_MAPPING_FK1` FOREIGN KEY (`RP_ID`) REFERENCES `WM_RESOURCEPLAN` (`RP_ID`), CONSTRAINT `WM_MAPPING_FK2` FOREIGN KEY (`POOL_ID`) REFERENCES `WM_POOL` (`POOL_ID`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1; diff --git a/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql b/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql index 915af8bf4b..ec95c1734a 100644 --- a/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql +++ b/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql @@ -856,7 +856,7 @@ CREATE TABLE IF NOT EXISTS WM_RESOURCEPLAN ( `STATUS` varchar(20) NOT NULL, `DEFAULT_POOL_ID` bigint(20), PRIMARY KEY (`RP_ID`), - UNIQUE KEY `UNIQUE_WM_RESOURCEPLAN` (`NAME`) + KEY `UNIQUE_WM_RESOURCEPLAN` (`NAME`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1; CREATE TABLE IF NOT EXISTS WM_POOL @@ -868,7 +868,7 @@ CREATE TABLE IF NOT EXISTS WM_POOL `QUERY_PARALLELISM` int(11), `SCHEDULING_POLICY` varchar(767), PRIMARY KEY (`POOL_ID`), - UNIQUE KEY `UNIQUE_WM_POOL` (`RP_ID`, `PATH`), + KEY `UNIQUE_WM_POOL` (`RP_ID`, `PATH`), CONSTRAINT `WM_POOL_FK1` FOREIGN KEY (`RP_ID`) REFERENCES `WM_RESOURCEPLAN` (`RP_ID`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1; @@ -882,7 +882,7 @@ CREATE TABLE IF NOT EXISTS WM_TRIGGER `TRIGGER_EXPRESSION` varchar(1024), `ACTION_EXPRESSION` varchar(1024), PRIMARY KEY (`TRIGGER_ID`), - UNIQUE KEY `UNIQUE_WM_TRIGGER` (`RP_ID`, `NAME`), + KEY `UNIQUE_WM_TRIGGER` (`RP_ID`, `NAME`), CONSTRAINT `WM_TRIGGER_FK1` FOREIGN KEY (`RP_ID`) REFERENCES `WM_RESOURCEPLAN` (`RP_ID`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1; @@ -904,7 +904,7 @@ CREATE TABLE IF NOT EXISTS WM_MAPPING `POOL_ID` bigint(20), `ORDERING` int, PRIMARY KEY (`MAPPING_ID`), - UNIQUE KEY `UNIQUE_WM_MAPPING` (`RP_ID`, `ENTITY_TYPE`, `ENTITY_NAME`), + KEY `UNIQUE_WM_MAPPING` (`RP_ID`, `ENTITY_TYPE`, `ENTITY_NAME`), CONSTRAINT `WM_MAPPING_FK1` FOREIGN KEY (`RP_ID`) REFERENCES `WM_RESOURCEPLAN` (`RP_ID`), CONSTRAINT `WM_MAPPING_FK2` FOREIGN KEY (`POOL_ID`) REFERENCES `WM_POOL` (`POOL_ID`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index 6d48783d48..57e1803805 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -59,7 +59,7 @@ import org.apache.hadoop.hive.ql.parse.QB; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.wm.WmContext; +import org.apache.hadoop.hive.ql.wm.TriggerContext; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; @@ -150,18 +150,18 @@ */ private Map insertBranchToNamePrefix = new HashMap<>(); private Operation operation = Operation.OTHER; - private WmContext wmContext; + private TriggerContext triggerContext; public void setOperation(Operation operation) { this.operation = operation; } - public WmContext getWmContext() { - return wmContext; + public TriggerContext getTriggerContext() { + return triggerContext; } - public void setWmContext(final WmContext wmContext) { - this.wmContext = wmContext; + public void setTriggerContext(final TriggerContext triggerContext) { + this.triggerContext = triggerContext; } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 4d52d748f1..389a1a6c0b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -113,7 +113,7 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; -import org.apache.hadoop.hive.ql.wm.WmContext; +import org.apache.hadoop.hive.ql.wm.TriggerContext; import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.JobClient; @@ -732,8 +732,8 @@ private void setTriggerContext(final String queryId) { } else { queryStartTime = queryDisplay.getQueryStartTime(); } - WmContext wmContext = new WmContext(queryStartTime, queryId); - ctx.setWmContext(wmContext); + TriggerContext triggerContext = new TriggerContext(queryStartTime, queryId); + ctx.setTriggerContext(triggerContext); } private boolean startImplicitTxn(HiveTxnManager txnManager) throws LockException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/AmPluginNode.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/AmPluginNode.java index eb6442180b..0509cbc77e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/AmPluginNode.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/AmPluginNode.java @@ -18,21 +18,18 @@ package org.apache.hadoop.hive.ql.exec.tez; -import org.apache.hadoop.security.token.Token; import org.apache.hive.common.util.Ref; + +import java.util.concurrent.TimeoutException; + +import org.apache.hadoop.security.token.Token; import org.apache.tez.common.security.JobTokenIdentifier; -import org.codehaus.jackson.annotate.JsonIgnore; -import org.codehaus.jackson.annotate.JsonProperty; -import org.codehaus.jackson.map.annotate.JsonSerialize; -@JsonSerialize public interface AmPluginNode { - class AmPluginInfo { - @JsonProperty("amPluginPort") + public static class AmPluginInfo { public final int amPluginPort; public final Token amPluginToken; public final String amPluginTokenJobId; - @JsonProperty("amHost") public final String amHost; AmPluginInfo(String amHost, int amPluginPort, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KillMoveTriggerActionHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KillMoveTriggerActionHandler.java index b16f1c30a0..94b189bdd5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KillMoveTriggerActionHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KillMoveTriggerActionHandler.java @@ -21,12 +21,13 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.wm.Trigger; import org.apache.hadoop.hive.ql.wm.TriggerActionHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class KillMoveTriggerActionHandler implements TriggerActionHandler { +public class KillMoveTriggerActionHandler implements TriggerActionHandler { private static final Logger LOG = LoggerFactory.getLogger(KillMoveTriggerActionHandler.class); private final WorkloadManager wm; @@ -35,20 +36,31 @@ } @Override - public void applyAction(final Map queriesViolated) { - Map> moveFutures = new HashMap<>(); - Map> killFutures = new HashMap<>(); - for (Map.Entry entry : queriesViolated.entrySet()) { - WmTezSession wmTezSession = entry.getKey(); + public void applyAction(final Map queriesViolated) { + TezSessionState sessionState; + Map> moveFutures = new HashMap<>(queriesViolated.size()); + for (Map.Entry entry : queriesViolated.entrySet()) { switch (entry.getValue().getAction().getType()) { case KILL_QUERY: - Future killFuture = wm.applyKillSessionAsync(wmTezSession, entry.getValue().getViolationMsg()); - killFutures.put(wmTezSession, killFuture); + sessionState = entry.getKey(); + String queryId = sessionState.getTriggerContext().getQueryId(); + try { + sessionState.getKillQuery().killQuery(queryId, entry.getValue().getViolationMsg()); + } catch (HiveException e) { + LOG.warn("Unable to kill query {} for trigger violation"); + } break; case MOVE_TO_POOL: - String destPoolName = entry.getValue().getAction().getPoolName(); - Future moveFuture = wm.applyMoveSessionAsync(wmTezSession, destPoolName); - moveFutures.put(wmTezSession, moveFuture); + sessionState = entry.getKey(); + if (sessionState instanceof WmTezSession) { + WmTezSession wmTezSession = (WmTezSession) sessionState; + String destPoolName = entry.getValue().getAction().getPoolName(); + Future moveFuture = wm.applyMoveSessionAsync(wmTezSession, destPoolName); + moveFutures.put(wmTezSession, moveFuture); + } else { + throw new RuntimeException("WmTezSession is expected. Got: " + sessionState.getClass().getSimpleName() + + ". SessionId: " + sessionState.getSessionId()); + } break; default: throw new RuntimeException("Unsupported action: " + entry.getValue()); @@ -57,28 +69,15 @@ public void applyAction(final Map queriesViolated) { for (Map.Entry> entry : moveFutures.entrySet()) { WmTezSession wmTezSession = entry.getKey(); - Future future = entry.getValue(); + Future moveFuture = entry.getValue(); try { // block to make sure move happened successfully - if (future.get()) { + if (moveFuture.get()) { LOG.info("Moved session {} to pool {}", wmTezSession.getSessionId(), wmTezSession.getPoolName()); } } catch (InterruptedException | ExecutionException e) { LOG.error("Exception while moving session {}", wmTezSession.getSessionId(), e); } } - - for (Map.Entry> entry : killFutures.entrySet()) { - WmTezSession wmTezSession = entry.getKey(); - Future future = entry.getValue(); - try { - // block to make sure kill happened successfully - if (future.get()) { - LOG.info("Killed session {}", wmTezSession.getSessionId()); - } - } catch (InterruptedException | ExecutionException e) { - LOG.error("Exception while killing session {}", wmTezSession.getSessionId(), e); - } - } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KillTriggerActionHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KillTriggerActionHandler.java index 50d234deaa..8c60b6f1bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KillTriggerActionHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KillTriggerActionHandler.java @@ -28,7 +28,7 @@ /** * Handles only Kill Action. */ -public class KillTriggerActionHandler implements TriggerActionHandler { +public class KillTriggerActionHandler implements TriggerActionHandler { private static final Logger LOG = LoggerFactory.getLogger(KillTriggerActionHandler.class); @Override @@ -37,7 +37,7 @@ public void applyAction(final Map queriesViolated) { switch (entry.getValue().getAction().getType()) { case KILL_QUERY: TezSessionState sessionState = entry.getKey(); - String queryId = sessionState.getWmContext().getQueryId(); + String queryId = sessionState.getTriggerContext().getQueryId(); try { KillQuery killQuery = sessionState.getKillQuery(); // if kill query is null then session might have been released to pool or closed already diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java index dd879fc5e8..6fa37244a5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java @@ -38,6 +38,7 @@ import java.util.concurrent.atomic.AtomicReference; import javax.security.auth.login.LoginException; +import org.apache.commons.codec.binary.Hex; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.io.FilenameUtils; import org.apache.hadoop.conf.Configuration; @@ -61,7 +62,7 @@ import org.apache.hadoop.hive.ql.session.KillQuery; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; -import org.apache.hadoop.hive.ql.wm.WmContext; +import org.apache.hadoop.hive.ql.wm.TriggerContext; import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.security.Credentials; @@ -84,9 +85,6 @@ import org.apache.tez.serviceplugins.api.ServicePluginsDescriptor; import org.apache.tez.serviceplugins.api.TaskCommunicatorDescriptor; import org.apache.tez.serviceplugins.api.TaskSchedulerDescriptor; -import org.codehaus.jackson.annotate.JsonIgnore; -import org.codehaus.jackson.annotate.JsonProperty; -import org.codehaus.jackson.map.annotate.JsonSerialize; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor; @@ -97,7 +95,6 @@ /** * Holds session state related to Tez */ -@JsonSerialize public class TezSessionState { protected static final Logger LOG = LoggerFactory.getLogger(TezSessionState.class.getName()); @@ -114,24 +111,19 @@ private Future sessionFuture; /** Console used for user feedback during async session opening. */ private LogHelper console; - @JsonProperty("sessionId") private String sessionId; private final DagUtils utils; - @JsonProperty("queueName") private String queueName; - @JsonProperty("defaultQueue") private boolean defaultQueue = false; - @JsonProperty("user") private String user; private AtomicReference ownerThread = new AtomicReference<>(null); private final Set additionalFilesNotFromConf = new HashSet(); private final Set localizedResources = new HashSet(); - @JsonProperty("doAsEnabled") private boolean doAsEnabled; private boolean isLegacyLlapMode; - private WmContext wmContext; + private TriggerContext triggerContext; private KillQuery killQuery; private static final Cache shaCache = CacheBuilder.newBuilder().maximumSize(100).build(); @@ -860,12 +852,12 @@ public void destroy() throws Exception { TezSessionPoolManager.getInstance().destroy(this); } - public WmContext getWmContext() { - return wmContext; + public TriggerContext getTriggerContext() { + return triggerContext; } - public void setWmContext(final WmContext wmContext) { - this.wmContext = wmContext; + public void setTriggerContext(final TriggerContext triggerContext) { + this.triggerContext = triggerContext; } public void setKillQuery(final KillQuery killQuery) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java index 8795cfcee1..af77f300c2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java @@ -26,6 +26,7 @@ import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; @@ -43,6 +44,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.QueryInfo; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Task; @@ -61,7 +63,7 @@ import org.apache.hadoop.hive.ql.plan.UnionWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.wm.WmContext; +import org.apache.hadoop.hive.ql.wm.TriggerContext; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; @@ -144,8 +146,8 @@ public int execute(DriverContext driverContext) { // some DDL task that directly executes a TezTask does not setup Context and hence TriggerContext. // Setting queryId is messed up. Some DDL tasks have executionId instead of proper queryId. String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID); - WmContext wmContext = new WmContext(System.currentTimeMillis(), queryId); - ctx.setWmContext(wmContext); + TriggerContext triggerContext = new TriggerContext(System.currentTimeMillis(), queryId); + ctx.setTriggerContext(triggerContext); } // Need to remove this static hack. But this is the way currently to get a session. @@ -156,6 +158,7 @@ public int execute(DriverContext driverContext) { if (session != null && !session.isOpen()) { LOG.warn("The session: " + session + " has not been opened"); } + Set desiredCounters = new HashSet<>(); // We only need a username for UGI to use for groups; getGroups will fetch the groups // based on Hadoop configuration, as documented at // https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/GroupsMapping.html @@ -163,11 +166,15 @@ public int execute(DriverContext driverContext) { MappingInput mi = (userName == null) ? new MappingInput("anonymous", null) : new MappingInput(ss.getUserName(), UserGroupInformation.createRemoteUser(ss.getUserName()).getGroups()); - WmContext wmContext = ctx.getWmContext(); - session = WorkloadManagerFederation.getSession(session, conf, mi, getWork().getLlapMode(), wmContext); + session = WorkloadManagerFederation.getSession( + session, conf, mi, getWork().getLlapMode(), desiredCounters); - LOG.info("Subscribed to counters: {} for queryId: {}", wmContext.getSubscribedCounters(), wmContext.getQueryId()); + TriggerContext triggerContext = ctx.getTriggerContext(); + triggerContext.setDesiredCounters(desiredCounters); + LOG.info("Subscribed to counters: {} for queryId: {}", + desiredCounters, triggerContext.getQueryId()); ss.setTezSession(session); + session.setTriggerContext(triggerContext); try { // jobConf will hold all the configuration for hadoop, tez, and hive JobConf jobConf = utils.createConfiguration(conf); @@ -249,22 +256,12 @@ public int execute(DriverContext driverContext) { // Currently, reopen on an attempted reuse will take care of that; we cannot tell // if the session is usable until we try. // We return this to the pool even if it's unusable; reopen is supposed to handle this. - wmContext = ctx.getWmContext(); try { session.returnToSessionManager(); } catch (Exception e) { LOG.error("Failed to return session: {} to pool", session, e); throw e; } - - if (!conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("none") && - wmContext != null) { - if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("json")) { - wmContext.printJson(console); - } else if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("text")) { - wmContext.print(console); - } - } } if (LOG.isInfoEnabled() && counters != null @@ -588,9 +585,9 @@ DAGClient submit(JobConf conf, DAG dag, Path scratchDir, console.printInfo("Dag submit failed due to " + e.getMessage() + " stack trace: " + Arrays.toString(e.getStackTrace()) + " retrying..."); // TODO: this is temporary, need to refactor how reopen is invoked. - WmContext oldCtx = sessionState.getWmContext(); + TriggerContext oldCtx = sessionState.getTriggerContext(); sessionState = sessionState.reopen(conf, inputOutputJars); - sessionState.setWmContext(oldCtx); + sessionState.setTriggerContext(oldCtx); dagClient = sessionState.getSession().submitDAG(dag); } catch (Exception retryException) { // we failed to submit after retrying. Destroy session and bail. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TriggerValidatorRunnable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TriggerValidatorRunnable.java index 6414f05fe0..58216595e2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TriggerValidatorRunnable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TriggerValidatorRunnable.java @@ -24,7 +24,7 @@ import org.apache.hadoop.hive.ql.wm.SessionTriggerProvider; import org.apache.hadoop.hive.ql.wm.Trigger; import org.apache.hadoop.hive.ql.wm.TriggerActionHandler; -import org.apache.hadoop.hive.ql.wm.WmContext; +import org.apache.hadoop.hive.ql.wm.TriggerContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,17 +46,17 @@ public void run() { final List sessions = sessionTriggerProvider.getSessions(); final List triggers = sessionTriggerProvider.getTriggers(); for (TezSessionState sessionState : sessions) { - WmContext wmContext = sessionState.getWmContext(); - if (wmContext != null && !wmContext.isQueryCompleted() - && !wmContext.getCurrentCounters().isEmpty()) { - Map currentCounters = wmContext.getCurrentCounters(); + TriggerContext triggerContext = sessionState.getTriggerContext(); + if (triggerContext != null && !triggerContext.isQueryCompleted() + && !triggerContext.getCurrentCounters().isEmpty()) { + Map currentCounters = triggerContext.getCurrentCounters(); for (Trigger currentTrigger : triggers) { String desiredCounter = currentTrigger.getExpression().getCounterLimit().getName(); // there could be interval where desired counter value is not populated by the time we make this check if (currentCounters.containsKey(desiredCounter)) { long currentCounterValue = currentCounters.get(desiredCounter); if (currentTrigger.apply(currentCounterValue)) { - String queryId = sessionState.getWmContext().getQueryId(); + String queryId = sessionState.getTriggerContext().getQueryId(); if (violatedSessions.containsKey(sessionState)) { // session already has a violation Trigger existingTrigger = violatedSessions.get(sessionState); @@ -84,7 +84,7 @@ public void run() { Trigger chosenTrigger = violatedSessions.get(sessionState); if (chosenTrigger != null) { - LOG.info("Query: {}. {}. Applying action.", sessionState.getWmContext().getQueryId(), + LOG.info("Query: {}. {}. Applying action.", sessionState.getTriggerContext().getQueryId(), chosenTrigger.getViolationMsg()); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmEvent.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmEvent.java deleted file mode 100644 index 33341ad4a9..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmEvent.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.exec.tez; - -import org.apache.hadoop.hive.ql.wm.WmContext; -import org.codehaus.jackson.annotate.JsonProperty; -import org.codehaus.jackson.map.annotate.JsonSerialize; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Workload Manager events at query level. - */ -@JsonSerialize -public class WmEvent { - private static final Logger LOG = LoggerFactory.getLogger(WmEvent.class); - enum EventType { - GET, // get session - KILL, // kill query - DESTROY, // destroy session - RESTART, // restart session - RETURN, // return session back to pool - MOVE // move session to different pool - } - - // snapshot of subset of wm tez session info for printing in events summary - @JsonSerialize - public static class WmTezSessionInfo { - @JsonProperty("sessionId") - private final String sessionId; - @JsonProperty("poolName") - private final String poolName; - @JsonProperty("clusterPercent") - private final double clusterPercent; - - WmTezSessionInfo(WmTezSession wmTezSession) { - this.poolName = wmTezSession.getPoolName(); - this.sessionId = wmTezSession.getSessionId(); - this.clusterPercent = wmTezSession.getClusterFraction() * 100.0; - } - - public String getPoolName() { - return poolName; - } - - public String getSessionId() { - return sessionId; - } - - public double getClusterPercent() { - return clusterPercent; - } - - @Override - public String toString() { - return "SessionId: " + sessionId + " Pool: " + poolName + " Cluster %: " + clusterPercent; - } - } - - @JsonProperty("wmTezSessionInfo") - private WmTezSessionInfo wmTezSessionInfo; - @JsonProperty("eventStartTimestamp") - private long eventStartTimestamp; - @JsonProperty("eventEndTimestamp") - private long eventEndTimestamp; - @JsonProperty("eventType") - private final EventType eventType; - @JsonProperty("elapsedTime") - private long elapsedTime; - - WmEvent(final EventType eventType) { - this.eventType = eventType; - this.eventStartTimestamp = System.currentTimeMillis(); - } - - public long getEventStartTimestamp() { - return eventStartTimestamp; - } - - public EventType getEventType() { - return eventType; - } - - public WmTezSessionInfo getWmTezSessionInfo() { - return wmTezSessionInfo; - } - - public long getEventEndTimestamp() { - return eventEndTimestamp; - } - - public long getElapsedTime() { - return elapsedTime; - } - - public void endEvent(final WmTezSession sessionState) { - this.wmTezSessionInfo = new WmTezSessionInfo(sessionState); - this.eventEndTimestamp = System.currentTimeMillis(); - this.elapsedTime = eventEndTimestamp - eventStartTimestamp; - WmContext wmContext = sessionState.getWmContext(); - if (wmContext != null) { - wmContext.addWMEvent(this); - LOG.info("Added WMEvent: {}", this); - } - } - - @Override - public String toString() { - return "EventType: " + eventType + " EventStartTimestamp: " + eventStartTimestamp + " elapsedTime: " + - elapsedTime + " wmTezSessionInfo:" + wmTezSessionInfo; - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmTezSession.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmTezSession.java index e78ef44c11..d61c5315af 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmTezSession.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmTezSession.java @@ -21,40 +21,28 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.SettableFuture; - -import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.registry.impl.TezAmInstance; import org.apache.hive.common.util.Ref; -import org.codehaus.jackson.annotate.JsonIgnore; -import org.codehaus.jackson.annotate.JsonProperty; -import org.codehaus.jackson.map.annotate.JsonSerialize; -@JsonSerialize public class WmTezSession extends TezSessionPoolSession implements AmPluginNode { - @JsonProperty("poolName") private String poolName; - @JsonProperty("clusterFraction") private double clusterFraction; /** * The reason to kill an AM. Note that this is for the entire session, not just for a query. * Once set, this can never be unset because you can only kill the session once. */ - @JsonProperty("killReason") private String killReason = null; private final Object amPluginInfoLock = new Object(); - @JsonProperty("amPluginInfo") private AmPluginInfo amPluginInfo = null; - private Integer amPluginEndpointVersion = null; + private Integer amPluginendpointVersion = null; private SettableFuture amRegistryFuture = null; private ScheduledFuture timeoutTimer = null; - @JsonProperty("queryId") private String queryId; - private SettableFuture returnFuture = null; private final WorkloadManager wmParent; @@ -111,12 +99,12 @@ void updateFromRegistry(TezAmInstance si, int ephSeqVersion) { synchronized (amPluginInfoLock) { // Ignore the outdated updates; for the same version, ignore non-null updates because // we assume that removal is the last thing that happens for any given version. - if ((amPluginEndpointVersion != null) && ((amPluginEndpointVersion > ephSeqVersion) - || (amPluginEndpointVersion == ephSeqVersion && info != null))) { + if ((amPluginendpointVersion != null) && ((amPluginendpointVersion > ephSeqVersion) + || (amPluginendpointVersion == ephSeqVersion && info != null))) { LOG.info("Ignoring an outdated info update {}: {}", ephSeqVersion, si); return; } - this.amPluginEndpointVersion = ephSeqVersion; + this.amPluginendpointVersion = ephSeqVersion; this.amPluginInfo = info; if (info != null) { // Only update someone waiting for info if we have the info. @@ -135,7 +123,7 @@ void updateFromRegistry(TezAmInstance si, int ephSeqVersion) { @Override public AmPluginInfo getAmPluginInfo(Ref version) { synchronized (amPluginInfoLock) { - version.value = amPluginEndpointVersion; + version.value = amPluginendpointVersion; return amPluginInfo; } } @@ -144,7 +132,7 @@ void setPoolName(String poolName) { this.poolName = poolName; } - public String getPoolName() { + String getPoolName() { return poolName; } @@ -157,7 +145,7 @@ void clearWm() { this.clusterFraction = 0f; } - public double getClusterFraction() { + double getClusterFraction() { return this.clusterFraction; } @@ -247,20 +235,6 @@ public String getQueryId() { return this.queryId; } - void createAndSetReturnFuture() { - this.returnFuture = SettableFuture.create(); - if (getWmContext() != null) { - getWmContext().setReturnEventFuture(returnFuture); - } - } - - void resolveReturnFuture() { - if (returnFuture != null) { - returnFuture.set(true); - returnFuture = null; - } - } - @Override public String toString() { return super.toString() + ", WM state poolName=" + poolName + ", clusterFraction=" diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WorkloadManager.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WorkloadManager.java index dbdbbf25db..ecdcf12510 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WorkloadManager.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WorkloadManager.java @@ -17,8 +17,20 @@ */ package org.apache.hadoop.hive.ql.exec.tez; -import java.util.ArrayList; +import com.google.common.collect.Lists; + +import java.util.concurrent.ExecutionException; + import java.util.Collection; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Sets; +import com.google.common.util.concurrent.FutureCallback; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.SettableFuture; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -30,7 +42,6 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -39,7 +50,6 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; - import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; @@ -58,24 +68,11 @@ import org.apache.hadoop.hive.ql.wm.SessionTriggerProvider; import org.apache.hadoop.hive.ql.wm.Trigger; import org.apache.hadoop.hive.ql.wm.TriggerActionHandler; -import org.apache.hadoop.hive.ql.wm.WmContext; import org.apache.hive.common.util.Ref; import org.apache.tez.dag.api.TezConfiguration; -import org.codehaus.jackson.annotate.JsonAutoDetect; -import org.codehaus.jackson.map.ObjectMapper; -import org.codehaus.jackson.map.SerializationConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.google.common.util.concurrent.FutureCallback; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.SettableFuture; -import com.google.common.util.concurrent.ThreadFactoryBuilder; - /** Workload management entry point for HS2. * Note on how this class operates. @@ -95,7 +92,6 @@ private static final char POOL_SEPARATOR = '.'; private static final String POOL_SEPARATOR_STR = "" + POOL_SEPARATOR; - private final ObjectMapper objectMapper; // Various final services, configs, etc. private final HiveConf conf; private final TezSessionPool tezAmPool; @@ -116,7 +112,6 @@ private Map pools; private String rpName, defaultPool; // For information only. private int totalQueryParallelism; - /** * The queries being killed. This is used to sync between the background kill finishing and the * query finishing and user returning the sessions, which can happen in separate iterations @@ -216,13 +211,6 @@ public static WorkloadManager create(String yarnQueue, HiveConf conf, WMFullReso wmThread.start(); updateResourcePlanAsync(plan).get(); // Wait for the initial resource plan to be applied. - - objectMapper = new ObjectMapper(); - objectMapper.configure(SerializationConfig.Feature.FAIL_ON_EMPTY_BEANS, false); - // serialize json based on field annotations only - objectMapper.setVisibilityChecker(objectMapper.getSerializationConfig().getDefaultVisibilityChecker() - .withGetterVisibility(JsonAutoDetect.Visibility.NONE) - .withSetterVisibility(JsonAutoDetect.Visibility.NONE)); } private static int determineQueryParallelism(WMFullResourcePlan plan) { @@ -405,13 +393,10 @@ private void scheduleWork(WmThreadSyncWork context) { KillQuery kq = toKill.getKillQuery(); try { if (kq != null && queryId != null) { - WmEvent wmEvent = new WmEvent(WmEvent.EventType.KILL); LOG.info("Invoking KillQuery for " + queryId + ": " + reason); try { kq.killQuery(queryId, reason); addKillQueryResult(toKill, true); - killCtx.killSessionFuture.set(true); - wmEvent.endEvent(toKill); LOG.debug("Killed " + queryId); return; } catch (HiveException ex) { @@ -438,10 +423,8 @@ private void scheduleWork(WmThreadSyncWork context) { toRestart.setQueryId(null); workPool.submit(() -> { try { - WmEvent wmEvent = new WmEvent(WmEvent.EventType.RESTART); // Note: sessions in toRestart are always in use, so they cannot expire in parallel. tezAmPool.replaceSession(toRestart, false, null); - wmEvent.endEvent(toRestart); } catch (Exception ex) { LOG.error("Failed to restart an old session; ignoring", ex); } @@ -454,9 +437,7 @@ private void scheduleWork(WmThreadSyncWork context) { LOG.info("Closing {} without restart", toDestroy); workPool.submit(() -> { try { - WmEvent wmEvent = new WmEvent(WmEvent.EventType.DESTROY); toDestroy.close(false); - wmEvent.endEvent(toDestroy); } catch (Exception ex) { LOG.error("Failed to close an old session; ignoring " + ex.getMessage()); } @@ -532,15 +513,9 @@ private void processCurrentEvents(EventState e, WmThreadSyncWork syncWork) throw e, sessionToReturn, poolsToRedistribute, true); switch (rr) { case OK: - WmEvent wmEvent = new WmEvent(WmEvent.EventType.RETURN); boolean wasReturned = tezAmPool.returnSessionAsync(sessionToReturn); if (!wasReturned) { syncWork.toDestroyNoRestart.add(sessionToReturn); - } else { - if (sessionToReturn.getWmContext() != null && sessionToReturn.getWmContext().isQueryCompleted()) { - sessionToReturn.resolveReturnFuture(); - } - wmEvent.endEvent(sessionToReturn); } break; case NOT_FOUND: @@ -588,9 +563,8 @@ private void processCurrentEvents(EventState e, WmThreadSyncWork syncWork) throw // We could consider delaying the move (when destination capacity is full) until there is claim in src pool. // May be change command to support ... DELAYED MOVE TO etl ... which will run under src cluster fraction as long // as possible - Map recordMoveEvents = new HashMap<>(); for (MoveSession moveSession : e.moveSessions) { - handleMoveSessionOnMasterThread(moveSession, syncWork, poolsToRedistribute, e.toReuse, recordMoveEvents); + handleMoveSessionOnMasterThread(moveSession, syncWork, poolsToRedistribute, e.toReuse); } e.moveSessions.clear(); @@ -616,21 +590,13 @@ private void processCurrentEvents(EventState e, WmThreadSyncWork syncWork) throw case OK: { iter.remove(); LOG.debug("Kill query succeeded; returning to the pool: {}", ctx.session); - ctx.killSessionFuture.set(true); - WmEvent wmEvent = new WmEvent(WmEvent.EventType.RETURN); if (!tezAmPool.returnSessionAsync(ctx.session)) { syncWork.toDestroyNoRestart.add(ctx.session); - } else { - if (ctx.session.getWmContext() != null && ctx.session.getWmContext().isQueryCompleted()) { - ctx.session.resolveReturnFuture(); - } - wmEvent.endEvent(ctx.session); } break; } case RESTART_REQUIRED: { iter.remove(); - ctx.killSessionFuture.set(true); LOG.debug("Kill query failed; restarting: {}", ctx.session); // Note: we assume here the session, before we resolve killQuery result here, is still // "in use". That is because all the user ops above like return, reopen, etc. @@ -654,7 +620,7 @@ private void processCurrentEvents(EventState e, WmThreadSyncWork syncWork) throw if (LOG.isDebugEnabled()) { LOG.info("Processing changes for pool " + poolName + ": " + pools.get(poolName)); } - processPoolChangesOnMasterThread(poolName, hasRequeues); + processPoolChangesOnMasterThread(poolName, syncWork, hasRequeues); } @@ -665,12 +631,7 @@ private void processCurrentEvents(EventState e, WmThreadSyncWork syncWork) throw } } - // 13. To record move events, we need to cluster fraction updates that happens at step 11. - for (Map.Entry entry : recordMoveEvents.entrySet()) { - entry.getValue().endEvent(entry.getKey()); - } - - // 14. Notify tests and global async ops. + // 13. Notify tests and global async ops. if (e.dumpStateFuture != null) { List result = new ArrayList<>(); result.add("RESOURCE PLAN " + rpName + "; default pool " + defaultPool); @@ -715,15 +676,11 @@ private void dumpPoolState(PoolState ps, List set) { } } - private void handleMoveSessionOnMasterThread(final MoveSession moveSession, - final WmThreadSyncWork syncWork, - final HashSet poolsToRedistribute, - final Map toReuse, - final Map recordMoveEvents) { + private void handleMoveSessionOnMasterThread(MoveSession moveSession, WmThreadSyncWork syncWork, + Set poolsToRedistribute, Map toReuse) { String destPoolName = moveSession.destPool; LOG.info("Handling move session event: {}", moveSession); if (validMove(moveSession.srcSession, destPoolName)) { - WmEvent moveEvent = new WmEvent(WmEvent.EventType.MOVE); // remove from src pool RemoveSessionResult rr = checkAndRemoveSessionFromItsPool( moveSession.srcSession, poolsToRedistribute, true); @@ -735,16 +692,15 @@ private void handleMoveSessionOnMasterThread(final MoveSession moveSession, moveSession.srcSession, destPoolName, poolsToRedistribute); if (added != null && added) { moveSession.future.set(true); - recordMoveEvents.put(moveSession.srcSession, moveEvent); return; } else { LOG.error("Failed to move session: {}. Session is not added to destination.", moveSession); } } else { WmTezSession session = moveSession.srcSession; - KillQueryContext killQueryContext = new KillQueryContext(session, "Destination pool " + destPoolName + - " is full. Killing query."); - resetAndQueueKill(syncWork.toKillQuery, killQueryContext, toReuse); + resetRemovedSessionToKill(session, toReuse); + syncWork.toKillQuery.put(session, new KillQueryContext(session, "Destination pool " + + destPoolName + " is full. Killing query.")); } } else { LOG.error("Failed to move session: {}. Session is not removed from its pool.", moveSession); @@ -852,7 +808,7 @@ private void handeReopenRequestOnMasterThread(EventState e, WmTezSession session case OK: // If pool didn't exist, checkAndRemoveSessionFromItsPool wouldn't have returned OK. PoolState pool = pools.get(poolName); - SessionInitContext sw = new SessionInitContext(future, poolName, session.getQueryId(), session.getWmContext()); + SessionInitContext sw = new SessionInitContext(future, poolName, session.getQueryId()); // We have just removed the session from the same pool, so don't check concurrency here. pool.initializingSessions.add(sw); ListenableFuture getFuture = tezAmPool.getSessionAsync(); @@ -962,14 +918,6 @@ private void applyNewResourcePlanOnMasterThread( totalQueryParallelism += qp; } } - // TODO: in the current impl, triggers are added to RP. For tez, no pool triggers (mapping between trigger name and - // pool name) will exist which means all triggers applies to tez. For LLAP, pool triggers has to exist for attaching - // triggers to specific pools. - // For usability, - // Provide a way for triggers sharing/inheritance possibly with following modes - // ONLY - only to pool - // INHERIT - child pools inherit from parent - // GLOBAL - all pools inherit if (e.resourcePlanToApply.isSetTriggers() && e.resourcePlanToApply.isSetPoolTriggers()) { Map triggers = new HashMap<>(); for (WMTrigger trigger : e.resourcePlanToApply.getTriggers()) { @@ -1088,7 +1036,8 @@ private void queueGetRequestOnMasterThread( } - private void processPoolChangesOnMasterThread(String poolName, boolean hasRequeues) throws Exception { + private void processPoolChangesOnMasterThread( + String poolName, WmThreadSyncWork context, boolean hasRequeues) throws Exception { PoolState pool = pools.get(poolName); if (pool == null) return; // Might be from before the new resource plan. @@ -1109,8 +1058,7 @@ private void processPoolChangesOnMasterThread(String poolName, boolean hasRequeu // Note that in theory, we are guaranteed to have a session waiting for us here, but // the expiration, failures, etc. may cause one to be missing pending restart. // See SessionInitContext javadoc. - SessionInitContext sw = new SessionInitContext(queueReq.future, poolName, queueReq.queryId, - queueReq.wmContext); + SessionInitContext sw = new SessionInitContext(queueReq.future, poolName, queueReq.queryId); ListenableFuture getFuture = tezAmPool.getSessionAsync(); Futures.addCallback(getFuture, sw); // It is possible that all the async methods returned on the same thread because the @@ -1149,14 +1097,8 @@ private void returnSessionOnFailedReuse( assert isOk || rr == RemoveSessionResult.IGNORE; if (!isOk) return; } - WmEvent wmEvent = new WmEvent(WmEvent.EventType.RETURN); if (!tezAmPool.returnSessionAsync(session)) { syncWork.toDestroyNoRestart.add(session); - } else { - if (session.getWmContext() != null && session.getWmContext().isQueryCompleted()) { - session.resolveReturnFuture(); - } - wmEvent.endEvent(session); } } @@ -1217,11 +1159,10 @@ private Boolean checkAndAddSessionToAnotherPool( PoolState destPool = pools.get(destPoolName); if (destPool != null && destPool.sessions.add(session)) { session.setPoolName(destPoolName); - updateTriggers(session); poolsToRedistribute.add(destPoolName); return true; } - LOG.error("Session {} was not added to pool {}", session, destPoolName); + LOG.error("Session {} was not not added to pool {}", session, destPoolName); return null; } @@ -1247,7 +1188,7 @@ private Boolean checkAndAddSessionToAnotherPool( return applyRpFuture; } - Future applyMoveSessionAsync(WmTezSession srcSession, String destPoolName) { + public Future applyMoveSessionAsync(WmTezSession srcSession, String destPoolName) { currentLock.lock(); MoveSession moveSession; try { @@ -1261,42 +1202,28 @@ private Boolean checkAndAddSessionToAnotherPool( return moveSession.future; } - Future applyKillSessionAsync(WmTezSession wmTezSession, String killReason) { - KillQueryContext killQueryContext; - currentLock.lock(); - try { - killQueryContext = new KillQueryContext(wmTezSession, killReason); - resetAndQueueKill(syncWork.toKillQuery, killQueryContext, current.toReuse); - LOG.info("Queued session for kill: {}", killQueryContext.session); - notifyWmThreadUnderLock(); - } finally { - currentLock.unlock(); - } - return killQueryContext.killSessionFuture; - } - private final static class GetRequest { - public static final Comparator ORDER_COMPARATOR = (o1, o2) -> { - if (o1.order == o2.order) return 0; - return o1.order < o2.order ? -1 : 1; + public static final Comparator ORDER_COMPARATOR = new Comparator() { + @Override + public int compare(GetRequest o1, GetRequest o2) { + if (o1.order == o2.order) return 0; + return o1.order < o2.order ? -1 : 1; + } }; private final long order; private final MappingInput mappingInput; private final SettableFuture future; private WmTezSession sessionToReuse; private final String queryId; - private final WmContext wmContext; private GetRequest(MappingInput mappingInput, String queryId, - SettableFuture future, WmTezSession sessionToReuse, long order, - final WmContext wmContext) { + SettableFuture future, WmTezSession sessionToReuse, long order) { assert mappingInput != null; this.mappingInput = mappingInput; this.queryId = queryId; this.future = future; this.sessionToReuse = sessionToReuse; this.order = order; - this.wmContext = wmContext; } @Override @@ -1305,16 +1232,15 @@ public String toString() { } } - public WmTezSession getSession( - TezSessionState session, MappingInput input, HiveConf conf, final WmContext wmContext) throws Exception { - WmEvent wmEvent = new WmEvent(WmEvent.EventType.GET); + public TezSessionState getSession( + TezSessionState session, MappingInput input, HiveConf conf) throws Exception { // Note: not actually used for pool sessions; verify some things like doAs are not set. validateConfig(conf); String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID); SettableFuture future = SettableFuture.create(); WmTezSession wmSession = checkSessionForReuse(session); GetRequest req = new GetRequest( - input, queryId, future, wmSession, getRequestVersion.incrementAndGet(), wmContext); + input, queryId, future, wmSession, getRequestVersion.incrementAndGet()); currentLock.lock(); try { current.getRequests.add(req); @@ -1326,9 +1252,7 @@ public WmTezSession getSession( } finally { currentLock.unlock(); } - WmTezSession sessionState = future.get(); - wmEvent.endEvent(sessionState); - return sessionState; + return future.get(); } @Override @@ -1359,7 +1283,6 @@ public void returnAfterUse(TezSessionPoolSession session) throws Exception { resetGlobalTezSession(wmTezSession); currentLock.lock(); try { - wmTezSession.createAndSetReturnFuture(); current.toReturn.add(wmTezSession); notifyWmThreadUnderLock(); } finally { @@ -1465,6 +1388,7 @@ public TezSessionState reopen(TezSessionState session, Configuration conf, if (sessionConf == null) { LOG.warn("Session configuration is null for " + wmTezSession); sessionConf = new HiveConf(conf, WorkloadManager.class); + } // TODO: ideally, we should handle reopen the same way no matter what. However, the cases // with additional files will have to wait until HIVE-17827 is unfucked, because it's @@ -1595,14 +1519,23 @@ protected final HiveConf getConf() { return conf; } - void updateTriggers(final WmTezSession session) { - WmContext wmContext = session.getWmContext(); - String poolName = session.getPoolName(); - PoolState poolState = pools.get(poolName); - if (wmContext != null && poolState != null) { - wmContext.addTriggers(poolState.getTriggers()); - LOG.info("Subscribed to counters: {}", wmContext.getSubscribedCounters()); + public List getTriggerCounterNames(final TezSessionState session) { + if (session instanceof WmTezSession) { + WmTezSession wmTezSession = (WmTezSession) session; + String poolName = wmTezSession.getPoolName(); + PoolState poolState = pools.get(poolName); + if (poolState != null) { + List counterNames = new ArrayList<>(); + List triggers = poolState.getTriggers(); + if (triggers != null) { + for (Trigger trigger : triggers) { + counterNames.add(trigger.getExpression().getCounterLimit().getName()); + } + } + return counterNames; + } } + return null; } @Override @@ -1680,10 +1613,6 @@ public double updateAllocationPercentages() { return sessions; } - public LinkedList getInitializingSessions() { - return initializingSessions; - } - @Override public String toString() { return "[" + fullName + ", query parallelism " + queryParallelism @@ -1696,7 +1625,8 @@ private void extractAllSessionsToKill(String killReason, IdentityHashMap toReuse, Map toKill) { for (WmTezSession sessionToKill : sessions) { - resetRemovedSessionToKill(toKill, new KillQueryContext(sessionToKill, killReason), toReuse); + resetRemovedSessionToKill(sessionToKill, toReuse); + toKill.put(sessionToKill, new KillQueryContext(sessionToKill, killReason)); } sessions.clear(); for (SessionInitContext initCtx : initializingSessions) { @@ -1706,7 +1636,8 @@ private void extractAllSessionsToKill(String killReason, if (sessionToKill == null) { continue; // Async op in progress; the callback will take care of this. } - resetRemovedSessionToKill(toKill, new KillQueryContext(sessionToKill, killReason), toReuse); + resetRemovedSessionToKill(sessionToKill, toReuse); + toKill.put(sessionToKill, new KillQueryContext(sessionToKill, killReason)); } initializingSessions.clear(); } @@ -1740,15 +1671,12 @@ public void setTriggers(final LinkedList triggers) { private SettableFuture future; private SessionInitState state; private String cancelReason; - private WmContext wmContext; - public SessionInitContext(SettableFuture future, String poolName, String queryId, - final WmContext wmContext) { + public SessionInitContext(SettableFuture future, String poolName, String queryId) { this.state = SessionInitState.GETTING; this.future = future; this.poolName = poolName; this.queryId = queryId; - this.wmContext = wmContext; } @Override @@ -1765,7 +1693,6 @@ public void onSuccess(WmTezSession session) { session.setPoolName(poolName); session.setQueueName(yarnQueue); session.setQueryId(queryId); - session.setWmContext(wmContext); this.session = session; this.state = SessionInitState.WAITING_FOR_REGISTRY; break; @@ -1813,7 +1740,6 @@ public void onSuccess(WmTezSession session) { session.setPoolName(null); session.setClusterFraction(0f); session.setQueryId(null); - session.setWmContext(null); tezAmPool.returnSession(session); break; } @@ -1932,18 +1858,16 @@ boolean isManaged(MappingInput input) { * like the session even before we kill it, or the kill fails and the user is happily computing * away. This class is to collect and make sense of the state around all this. */ - static final class KillQueryContext { - private SettableFuture killSessionFuture; + private static final class KillQueryContext { private final String reason; private final WmTezSession session; // Note: all the fields are only modified by master thread. private boolean isUserDone = false, isKillDone = false, hasKillFailed = false, hasUserFailed = false; - KillQueryContext(WmTezSession session, String reason) { + public KillQueryContext(WmTezSession session, String reason) { this.session = session; this.reason = reason; - this.killSessionFuture = SettableFuture.create(); } private void handleKillQueryCallback(boolean hasFailed) { @@ -1988,36 +1912,10 @@ public String toString() { } } - private static void resetRemovedSessionToKill(Map toKillQuery, - KillQueryContext killQueryContext, Map toReuse) { - toKillQuery.put(killQueryContext.session, killQueryContext); - killQueryContext.session.clearWm(); - GetRequest req = toReuse.remove(killQueryContext.session); - if (req != null) { - req.sessionToReuse = null; - } - } - - private void resetAndQueueKill(Map toKillQuery, - KillQueryContext killQueryContext, Map toReuse) { - - WmTezSession toKill = killQueryContext.session; - toKillQuery.put(toKill, killQueryContext); - - // The way this works is, a session in WM pool will move back to tez AM pool on a kill and will get - // reassigned back to WM pool on GetRequest based on user pool mapping. Only if we remove the session from active - // sessions list of its WM pool will the queue'd GetRequest be processed - String poolName = toKill.getPoolName(); - if (poolName != null) { - PoolState poolState = pools.get(poolName); - if (poolState != null) { - poolState.getSessions().remove(toKill); - poolState.getInitializingSessions().remove(toKill); - } - } - - toKill.clearWm(); - GetRequest req = toReuse.remove(toKill); + private static void resetRemovedSessionToKill( + WmTezSession sessionToKill, Map toReuse) { + sessionToKill.clearWm(); + GetRequest req = toReuse.remove(sessionToKill); if (req != null) { req.sessionToReuse = null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WorkloadManagerFederation.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WorkloadManagerFederation.java index 9d56204240..0a9fa72eaf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WorkloadManagerFederation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WorkloadManagerFederation.java @@ -1,4 +1,4 @@ -/* +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -16,57 +16,51 @@ * limitations under the License. */package org.apache.hadoop.hive.ql.exec.tez; -import java.util.HashSet; -import java.util.Set; +import org.slf4j.LoggerFactory; + +import org.slf4j.Logger; +import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.tez.UserPoolMapping.MappingInput; -import org.apache.hadoop.hive.ql.wm.WmContext; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class WorkloadManagerFederation { private static final Logger LOG = LoggerFactory.getLogger(WorkloadManagerFederation.class); public static TezSessionState getSession(TezSessionState session, HiveConf conf, - MappingInput input, boolean isUnmanagedLlapMode, final WmContext wmContext) throws Exception { - Set desiredCounters = new HashSet<>(); + MappingInput input, boolean isUnmanagedLlapMode, Set desiredCounters) throws Exception { // 1. If WM is not present just go to unmanaged. WorkloadManager wm = WorkloadManager.getInstance(); if (wm == null) { LOG.debug("Using unmanaged session - WM is not initialized"); - return getUnmanagedSession(session, conf, desiredCounters, isUnmanagedLlapMode, wmContext); + return getUnmanagedSession(session, conf, desiredCounters, isUnmanagedLlapMode); } // 2. We will ask WM for a preliminary mapping. This allows us to escape to the unmanaged path // quickly in the common case. It's still possible that resource plan will be updated and // our preliminary mapping won't work out. We'll handle that below. if (!wm.isManaged(input)) { LOG.info("Using unmanaged session - no mapping for " + input); - return getUnmanagedSession(session, conf, desiredCounters, isUnmanagedLlapMode, wmContext); + return getUnmanagedSession(session, conf, desiredCounters, isUnmanagedLlapMode); } // 3. Finally, try WM. try { // Note: this may just block to wait for a session based on parallelism. LOG.info("Getting a WM session for " + input); - WmTezSession result = wm.getSession(session, input, conf, wmContext); - result.setWmContext(wmContext); - wm.updateTriggers(result); + TezSessionState result = wm.getSession(session, input, conf); + desiredCounters.addAll(wm.getTriggerCounterNames(result)); return result; } catch (WorkloadManager.NoPoolMappingException ex) { LOG.info("NoPoolMappingException thrown. Getting an un-managed session.."); - return getUnmanagedSession(session, conf, desiredCounters, isUnmanagedLlapMode, wmContext); + return getUnmanagedSession(session, conf, desiredCounters, isUnmanagedLlapMode); } } private static TezSessionState getUnmanagedSession( - TezSessionState session, HiveConf conf, Set desiredCounters, boolean isWorkLlapNode, - final WmContext wmContext) throws Exception { + TezSessionState session, HiveConf conf, Set desiredCounters, boolean isWorkLlapNode) throws Exception { TezSessionPoolManager pm = TezSessionPoolManager.getInstance(); session = pm.getSession(session, conf, false, isWorkLlapNode); desiredCounters.addAll(pm.getTriggerCounterNames()); - wmContext.setSubscribedCounters(desiredCounters); - session.setWmContext(wmContext); return session; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/PrintSummary.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/PrintSummary.java index 8414c73e2b..5bb6bf1c41 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/PrintSummary.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/PrintSummary.java @@ -19,6 +19,6 @@ import org.apache.hadoop.hive.ql.session.SessionState; -public interface PrintSummary { +interface PrintSummary { void print(SessionState.LogHelper console); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java index 9726af1506..3dd4b31186 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java @@ -43,7 +43,7 @@ Licensed to the Apache Software Foundation (ASF) under one import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.ql.wm.TimeCounterLimit; -import org.apache.hadoop.hive.ql.wm.WmContext; +import org.apache.hadoop.hive.ql.wm.TriggerContext; import org.apache.hadoop.hive.ql.wm.VertexCounterLimit; import org.apache.hive.common.util.ShutdownHookManager; import org.apache.tez.common.counters.CounterGroup; @@ -156,7 +156,7 @@ public int monitorExecution() { boolean running = false; long checkInterval = MIN_CHECK_INTERVAL; - WmContext wmContext = null; + TriggerContext triggerContext = null; while (true) { try { @@ -167,12 +167,12 @@ public int monitorExecution() { status = dagClient.getDAGStatus(EnumSet.of(StatusGetOpts.GET_COUNTERS), checkInterval); TezCounters dagCounters = status.getDAGCounters(); vertexProgressMap = status.getVertexProgress(); - wmContext = context.getWmContext(); - if (dagCounters != null && wmContext != null) { - Set desiredCounters = wmContext.getSubscribedCounters(); + triggerContext = context.getTriggerContext(); + if (dagCounters != null && triggerContext != null) { + Set desiredCounters = triggerContext.getDesiredCounters(); if (desiredCounters != null && !desiredCounters.isEmpty()) { Map currentCounters = getCounterValues(dagCounters, vertexProgressMap, desiredCounters, done); - wmContext.setCurrentCounters(currentCounters); + triggerContext.setCurrentCounters(currentCounters); } } DAGStatus.State state = status.getState(); @@ -234,8 +234,8 @@ public int monitorExecution() { break; } } - if (wmContext != null && done) { - wmContext.setQueryCompleted(true); + if (triggerContext != null && done) { + triggerContext.setQueryCompleted(true); } } catch (Exception e) { console.printInfo("Exception: " + e.getMessage()); @@ -263,13 +263,13 @@ public int monitorExecution() { } else { console.printInfo("Retrying..."); } - if (wmContext != null && done) { - wmContext.setQueryCompleted(true); + if (triggerContext != null && done) { + triggerContext.setQueryCompleted(true); } } finally { if (done) { - if (wmContext != null && done) { - wmContext.setQueryCompleted(true); + if (triggerContext != null && done) { + triggerContext.setQueryCompleted(true); } if (rc != 0 && status != null) { for (String diag : status.getDiagnostics()) { @@ -324,7 +324,7 @@ public int monitorExecution() { if (!done) { counterName = TimeCounterLimit.TimeCounter.ELAPSED_TIME.name(); if (desiredCounters.contains(counterName)) { - updatedCounters.put(counterName, context.getWmContext().getElapsedTime()); + updatedCounters.put(counterName, context.getTriggerContext().getElapsedTime()); } counterName = TimeCounterLimit.TimeCounter.EXECUTION_TIME.name(); @@ -351,7 +351,6 @@ private void printSummary(boolean success, Map progressMap) { new LLAPioSummary(progressMap, dagClient).print(console); new FSCountersSummary(progressMap, dagClient).print(console); } - console.printInfo(""); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 0ad68166ae..5c7d7eec8a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -52,11 +52,47 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType; import org.apache.hadoop.hive.ql.exec.vector.expressions.*; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFBloomFilter; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFBloomFilterMerge; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountMerge; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumDecimal64ToDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimalComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimalFinal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimalPartial2; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDoubleComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgFinal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgLongComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgPartial2; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgTimestampComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPartial2; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -64,6 +100,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.*; import org.apache.hadoop.hive.ql.udf.generic.*; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -2697,11 +2734,24 @@ private VectorExpression getCastToBoolean(List childExpr) } // Long and double are handled using descriptors, string needs to be specially handled. if (isStringFamily(inputType)) { + // string casts to false if it is 0 characters long, otherwise true + VectorExpression lenExpr = createVectorExpression(StringLength.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.longTypeInfo); - VectorExpression lenExpr = createVectorExpression(CastStringToBoolean.class, childExpr, - VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo); + int outputColumnNum = ocm.allocateOutputColumn(TypeInfoFactory.booleanTypeInfo); + VectorExpression lenToBoolExpr = + new CastLongToBooleanViaLongToLong(lenExpr.getOutputColumnNum(), outputColumnNum); - return lenExpr; + lenToBoolExpr.setChildExpressions(new VectorExpression[] {lenExpr}); + + lenToBoolExpr.setInputTypeInfos(lenExpr.getOutputTypeInfo()); + lenToBoolExpr.setInputDataTypePhysicalVariations(lenExpr.getOutputDataTypePhysicalVariation()); + + lenToBoolExpr.setOutputTypeInfo(TypeInfoFactory.booleanTypeInfo); + lenToBoolExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); + + ocm.freeOutputColumn(lenExpr.getOutputColumnNum()); + return lenToBoolExpr; } return null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToBoolean.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToBoolean.java deleted file mode 100644 index 7a44035337..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToBoolean.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; - -/** - * Type cast string to boolean. - */ -public class CastStringToBoolean extends FuncStringToLong { - private static final long serialVersionUID = 1L; - - public CastStringToBoolean() { - super(); - } - - public CastStringToBoolean(int inputColumn, int outputColumn) { - super(inputColumn, outputColumn); - } - - @Override - protected void func(LongColumnVector outV, BytesColumnVector inV, int offset) { - - int start = inV.start[offset]; - int length = inV.length[offset]; - byte[] s = inV.vector[offset]; - boolean b = PrimitiveObjectInspectorUtils.parseBoolean(s, start, length); - outV.vector[offset] = b ? 1 : 0; - - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java deleted file mode 100644 index 5c0a7fae56..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java +++ /dev/null @@ -1,145 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; - -/** - * Superclass to support vectorized functions that take a long - * and return a string, optionally with additional configuration arguments. - * Used for cast(string), length(string), etc - */ -public abstract class FuncStringToLong extends VectorExpression { - private static final long serialVersionUID = 1L; - - private int inputCol; - private int outputCol; - - public FuncStringToLong(int inputCol, int outputCol) { - super(outputCol); - this.inputCol = inputCol; - this.outputCol = outputCol; - } - - public FuncStringToLong() { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - BytesColumnVector inV = (BytesColumnVector) batch.cols[inputCol]; - int[] sel = batch.selected; - int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputCol]; - - if (n == 0) { - //Nothing to do - return; - } - - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); - } - outV.isRepeating = false; - } else { - for (int i = 0; i != n; i++) { - func(outV, inV, i); - } - outV.isRepeating = false; - } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); - } - } - outV.isRepeating = false; - } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); - for (int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); - } - } - outV.isRepeating = false; - } - } - } - - /* Evaluate result for position i (using bytes[] to avoid storage allocation costs) - * and set position i of the output vector to the result. - */ - protected abstract void func(LongColumnVector outV, BytesColumnVector inV, int i); - - public int getOutputCol() { - return outputCol; - } - - public void setOutputCol(int outputCol) { - this.outputCol = outputCol; - } - - public int getInputCol() { - return inputCol; - } - - public void setInputCol(int inputCol) { - this.inputCol = inputCol; - } - - @Override - public String vectorExpressionParameters() { - return "col " + inputCol; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); - b.setMode(VectorExpressionDescriptor.Mode.PROJECTION).setNumArguments(1) - .setArgumentTypes(VectorExpressionDescriptor.ArgumentType.STRING_FAMILY) - .setInputExpressionTypes(VectorExpressionDescriptor.InputExpressionType.COLUMN); - return b.build(); - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index 8c2894b482..b5399d6ccf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -21,8 +21,13 @@ import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Map; + +import com.google.common.collect.ImmutableMap; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; @@ -30,6 +35,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.metadata.HiveException; /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecWMEventsSummaryPrinter.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecWMEventsSummaryPrinter.java deleted file mode 100644 index 83cca8903b..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecWMEventsSummaryPrinter.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.hooks; - -import java.util.List; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.QueryPlan; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.tez.TezTask; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.wm.WmContext; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Post execution (success or failure) hook to print hive workload manager events summary. - */ -public class PostExecWMEventsSummaryPrinter implements ExecuteWithHookContext { - private static final Logger LOG = LoggerFactory.getLogger(PostExecWMEventsSummaryPrinter.class.getName()); - - @Override - public void run(HookContext hookContext) throws Exception { - assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK || - hookContext.getHookType() == HookContext.HookType.ON_FAILURE_HOOK); - HiveConf conf = hookContext.getConf(); - if (!"tez".equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE))) { - return; - } - - LOG.info("Executing post execution hook to print workload manager events summary.."); - SessionState.LogHelper console = SessionState.getConsole(); - QueryPlan plan = hookContext.getQueryPlan(); - if (plan == null) { - return; - } - - List rootTasks = Utilities.getTezTasks(plan.getRootTasks()); - for (TezTask tezTask : rootTasks) { - WmContext wmContext = tezTask.getDriverContext().getCtx().getWmContext(); - if (wmContext != null) { - wmContext.shortPrint(console); - } - } - } - -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java index d1fe49c875..98d140fc8b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -104,7 +104,6 @@ import org.apache.calcite.util.Stacks; import org.apache.calcite.util.Util; import org.apache.calcite.util.mapping.Mappings; -import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttleImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; @@ -1131,7 +1130,8 @@ private Frame decorrelateInputWithValueGenerator(RelNode rel) { // we need to keep predicate kind e.g. EQUAL or NOT EQUAL // so that later while decorrelating LogicalCorrelate appropriate join predicate // is generated - def.setPredicateKind((SqlKind)((Pair)e.getNode()).getValue()); + def.setPredicateKind((SqlOperator) ((Pair)((Pair)e.getNode()).getValue()).getKey()); + def.setIsLeft((boolean)((Pair)((Pair) e.getNode()).getValue()).getValue()); map.put(def, (Integer)((Pair) e.getNode()).getKey()); } } @@ -1170,30 +1170,39 @@ private Frame decorrelateInputWithValueGenerator(RelNode rel) { * and if found, throws a {@link Util.FoundOne}. */ private void findCorrelationEquivalent(CorRef correlation, RexNode e) throws Util.FoundOne { - switch (e.getKind()) { - // TODO: for now only EQUAL and NOT EQUAL corr predicates are optimized - //optimize rest of the predicates - case NOT_EQUALS: - if((boolean)valueGen.peek()) { - // we will need value generator - break; - } - case EQUALS: - final RexCall call = (RexCall) e; - final List operands = call.getOperands(); - if (references(operands.get(0), correlation) - && operands.get(1) instanceof RexInputRef) { - throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(1)).getIndex(), e.getKind())); - } - if (references(operands.get(1), correlation) - && operands.get(0) instanceof RexInputRef) { - throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(0)).getIndex(), e.getKind())); - } - break; + if(e instanceof RexCall){ + switch (e.getKind()) { case AND: for (RexNode operand : ((RexCall) e).getOperands()) { findCorrelationEquivalent(correlation, operand); } + default: + final RexCall call = (RexCall) e; + final List operands = call.getOperands(); + if(operands.size() == 2) { + if (references(operands.get(0), correlation) + && operands.get(1) instanceof RexInputRef) { + // if call isn't EQUAL type and it has been determined that value generate might be + // required we should rather generate value generator + if(e.getKind() != SqlKind.EQUALS && (boolean)valueGen.peek()) { + return; + } + throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(1)).getIndex(), + Pair.of(((RexCall) e).getOperator(), true))); + } + if (references(operands.get(1), correlation) + && operands.get(0) instanceof RexInputRef) { + // if call isn't EQUAL type and it has been determined that value generate might be + // required we should rather generate value generator + if(e.getKind() != SqlKind.EQUALS && (boolean)valueGen.peek()) { + return; + } + throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(0)).getIndex(), + Pair.of(((RexCall) e).getOperator(), false))); + } + break; + } + } } } @@ -1426,23 +1435,21 @@ public Frame decorrelateRel(LogicalCorrelate rel) { } final int newLeftPos = leftFrame.oldToNewOutputs.get(corDef.field); final int newRightPos = rightOutput.getValue(); - if(corDef.getPredicateKind() == SqlKind.NOT_EQUALS) { + SqlOperator callOp = corDef.getPredicateKind() == null ? + SqlStdOperatorTable.EQUALS: corDef.getPredicateKind(); + if(corDef.isLeft) { conditions.add( - rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, + rexBuilder.makeCall(callOp, RexInputRef.of(newLeftPos, newLeftOutput), new RexInputRef(newLeftFieldCount + newRightPos, newRightOutput.get(newRightPos).getType()))); - } else { - assert(corDef.getPredicateKind() == null - || corDef.getPredicateKind() == SqlKind.EQUALS); conditions.add( - rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, - RexInputRef.of(newLeftPos, newLeftOutput), + rexBuilder.makeCall(callOp, new RexInputRef(newLeftFieldCount + newRightPos, - newRightOutput.get(newRightPos).getType()))); - + newRightOutput.get(newRightPos).getType()), + RexInputRef.of(newLeftPos, newLeftOutput))); } // remove this cor var from output position mapping @@ -1921,9 +1928,7 @@ public void setValueGenerator(boolean valueGenerator) { // there is support of not equal @Override public RexNode visitCall(final RexCall call) { if(!valueGenerator) { - switch (call.getKind()) { - case EQUALS: - case NOT_EQUALS: + if(call.getOperands().size() == 2) { final List operands = new ArrayList<>(call.operands); RexNode o0 = operands.get(0); RexNode o1 = operands.get(1); @@ -3018,12 +3023,18 @@ public CorDef def() { static class CorDef implements Comparable { public final CorrelationId corr; public final int field; - private SqlKind predicateKind; + + private SqlOperator predicateKind; + // this indicates if corr var is left operand of rex call or not + // this is used in decorrelate(logical correlate) to appropriately + // create Rex node expression + private boolean isLeft; CorDef(CorrelationId corr, int field) { this.corr = corr; this.field = field; this.predicateKind = null; + this.isLeft=false; } @Override public String toString() { @@ -3048,13 +3059,24 @@ public int compareTo(@Nonnull CorDef o) { } return Integer.compare(field, o.field); } - public SqlKind getPredicateKind() { + + public SqlOperator getPredicateKind() { return predicateKind; } - public void setPredicateKind(SqlKind predKind) { + + public void setPredicateKind(SqlOperator predKind) { this.predicateKind = predKind; } + + public boolean getIsLeft() { + return this.isLeft; + } + + public void setIsLeft(boolean isLeft) { + this.isLeft = isLeft; + } + } /** A map of the locations of diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 76c82e2606..ba64f97105 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1355,7 +1355,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu RelNode calciteGenPlan = null; RelNode calcitePreCboPlan = null; RelNode calciteOptimizedPlan = null; - subqueryId = 0; + subqueryId = -1; /* * recreate cluster, so that it picks up the additional traitDef @@ -2582,104 +2582,106 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, private void subqueryRestrictionCheck(QB qb, ASTNode searchCond, RelNode srcRel, boolean forHavingClause, Set corrScalarQueries, Set scalarQueriesWithAggNoWinNoGby) throws SemanticException { - List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); - - ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); - List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); - for(int i=0; i subqInfo = new ObjectPair(false, 0); - - ASTNode outerQueryExpr = (ASTNode) subQueryAST.getChild(2); - - if (outerQueryExpr != null && outerQueryExpr.getType() == HiveParser.TOK_SUBQUERY_EXPR ) { - - throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - outerQueryExpr, "IN/NOT IN subqueries are not allowed in LHS")); - } + List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); + + ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); + List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); + for(int i=0; i subqInfo = new ObjectPair(false, 0); + + ASTNode outerQueryExpr = (ASTNode) subQueryAST.getChild(2); + + if (outerQueryExpr != null && outerQueryExpr.getType() == HiveParser.TOK_SUBQUERY_EXPR) { + + throw new CalciteSubquerySemanticException( + ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + outerQueryExpr, "IN/NOT IN subqueries are not allowed in LHS")); + } - QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, - originalSubQueryAST, ctx); + QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, + originalSubQueryAST, ctx); - RowResolver inputRR = relToHiveRR.get(srcRel); + RowResolver inputRR = relToHiveRR.get(srcRel); - String havingInputAlias = null; + String havingInputAlias = null; - boolean [] subqueryConfig = {false, false}; - subQuery.subqueryRestrictionsCheck(inputRR, forHavingClause, - havingInputAlias, subqueryConfig); - if(subqueryConfig[0]) { - corrScalarQueries.add(originalSubQueryAST); - } - if(subqueryConfig[1]) { - scalarQueriesWithAggNoWinNoGby.add(originalSubQueryAST); - } + boolean [] subqueryConfig = {false, false}; + subQuery.subqueryRestrictionsCheck(inputRR, forHavingClause, + havingInputAlias, subqueryConfig); + if(subqueryConfig[0]) { + corrScalarQueries.add(originalSubQueryAST); + } + if(subqueryConfig[1]) { + scalarQueriesWithAggNoWinNoGby.add(originalSubQueryAST); + } } } private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean forHavingClause, Map subQueryToRelNode) throws SemanticException { - Set corrScalarQueriesWithAgg = new HashSet(); - Set scalarQueriesWithAggNoWinNoGby= new HashSet(); - //disallow subqueries which HIVE doesn't currently support - subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, corrScalarQueriesWithAgg, - scalarQueriesWithAggNoWinNoGby); - Deque stack = new ArrayDeque(); - stack.push(node); + Set corrScalarQueriesWithAgg = new HashSet(); + Set scalarQueriesWithAggNoWinNoGby= new HashSet(); + //disallow subqueries which HIVE doesn't currently support + subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, corrScalarQueriesWithAgg, + scalarQueriesWithAggNoWinNoGby); + Deque stack = new ArrayDeque(); + stack.push(node); - boolean isSubQuery = false; + boolean isSubQuery = false; - while (!stack.isEmpty()) { - ASTNode next = stack.pop(); + while (!stack.isEmpty()) { + ASTNode next = stack.pop(); - switch(next.getType()) { - case HiveParser.TOK_SUBQUERY_EXPR: + switch(next.getType()) { + case HiveParser.TOK_SUBQUERY_EXPR: /* * Restriction 2.h Subquery isnot allowed in LHS */ - if(next.getChildren().size() == 3 - && next.getChild(2).getType() == HiveParser.TOK_SUBQUERY_EXPR){ - throw new CalciteSemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - next.getChild(2), - "SubQuery in LHS expressions are not supported.")); - } - String sbQueryAlias = "sq_" + qb.incrNumSubQueryPredicates(); - QB qbSQ = new QB(qb.getId(), sbQueryAlias, true); - Phase1Ctx ctx1 = initPhase1Ctx(); - doPhase1((ASTNode)next.getChild(1), qbSQ, ctx1, null); - getMetaData(qbSQ); - RelNode subQueryRelNode = genLogicalPlan(qbSQ, false, relToHiveColNameCalcitePosMap.get(srcRel), - relToHiveRR.get(srcRel)); - subQueryToRelNode.put(next, subQueryRelNode); - //keep track of subqueries which are scalar, correlated and contains aggregate - // subquery expression. This will later be special cased in Subquery remove rule - // for correlated scalar queries with aggregate we have take care of the case where - // inner aggregate happens on empty result - if(corrScalarQueriesWithAgg.contains(next)) { - corrScalarRexSQWithAgg.add(subQueryRelNode); - } - if(scalarQueriesWithAggNoWinNoGby.contains(next)) { - scalarAggNoGbyNoWin.add(subQueryRelNode); - } - isSubQuery = true; - break; - default: - int childCount = next.getChildCount(); - for(int i = childCount - 1; i >= 0; i--) { - stack.push((ASTNode) next.getChild(i)); - } + if(next.getChildren().size() == 3 + && next.getChild(2).getType() == HiveParser.TOK_SUBQUERY_EXPR){ + throw new CalciteSemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + next.getChild(2), + "SubQuery in LHS expressions are not supported.")); + } + String sbQueryAlias = "sq_" + qb.incrNumSubQueryPredicates(); + QB qbSQ = new QB(qb.getId(), sbQueryAlias, true); + Phase1Ctx ctx1 = initPhase1Ctx(); + doPhase1((ASTNode)next.getChild(1), qbSQ, ctx1, null); + getMetaData(qbSQ); + this.subqueryId++; + RelNode subQueryRelNode = genLogicalPlan(qbSQ, false, + relToHiveColNameCalcitePosMap.get(srcRel), relToHiveRR.get(srcRel)); + subQueryToRelNode.put(next, subQueryRelNode); + //keep track of subqueries which are scalar, correlated and contains aggregate + // subquery expression. This will later be special cased in Subquery remove rule + // for correlated scalar queries with aggregate we have take care of the case where + // inner aggregate happens on empty result + if(corrScalarQueriesWithAgg.contains(next)) { + corrScalarRexSQWithAgg.add(subQueryRelNode); } + if(scalarQueriesWithAggNoWinNoGby.contains(next)) { + scalarAggNoGbyNoWin.add(subQueryRelNode); + } + isSubQuery = true; + break; + default: + int childCount = next.getChildCount(); + for(int i = childCount - 1; i >= 0; i--) { + stack.push((ASTNode) next.getChild(i)); + } + } } return isSubQuery; } @@ -2706,7 +2708,6 @@ private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, this.relToHiveColNameCalcitePosMap.put(filterRel, this.relToHiveColNameCalcitePosMap .get(srcRel)); relToHiveRR.put(filterRel, relToHiveRR.get(srcRel)); - this.subqueryId++; return filterRel; } else { return genFilterRelNode(searchCond, srcRel, outerNameToPosMap, outerRR, forHavingClause); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 07742e0485..f6bbac6a8f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12841,7 +12841,7 @@ public void processPositionAlias(ASTNode ast) throws SemanticException { for (int child_pos = 0; child_pos < child_count; ++child_pos) { ASTNode node = (ASTNode) next.getChild(child_pos); int type = node.getToken().getType(); - if (type == HiveParser.TOK_SELECT || type == HiveParser.TOK_SELECTDI) { + if (type == HiveParser.TOK_SELECT) { selectNode = node; } else if (type == HiveParser.TOK_GROUPBY) { groupbyNode = node; @@ -12877,49 +12877,7 @@ public void processPositionAlias(ASTNode ast) throws SemanticException { } } - // replace each of the position alias in ORDERBY with the actual column name, - // if cbo is enabled, orderby position will be processed in genPlan - if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED) - && orderbyNode != null) { - isAllCol = false; - for (int child_pos = 0; child_pos < selectNode.getChildCount(); ++child_pos) { - ASTNode node = (ASTNode) selectNode.getChild(child_pos).getChild(0); - if (node != null && node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { - isAllCol = true; - } - } - for (int child_pos = 0; child_pos < orderbyNode.getChildCount(); ++child_pos) { - ASTNode colNode = null; - ASTNode node = null; - if (orderbyNode.getChildCount() > 0) { - colNode = (ASTNode) orderbyNode.getChild(child_pos).getChild(0); - if (colNode.getChildCount() > 0) { - node = (ASTNode) colNode.getChild(0); - } - } - if (node != null && node.getToken().getType() == HiveParser.Number) { - if (isObyByPos) { - if (!isAllCol) { - int pos = Integer.parseInt(node.getText()); - if (pos > 0 && pos <= selectExpCnt && selectNode.getChild(pos - 1).getChildCount() > 0) { - colNode.setChild(0, selectNode.getChild(pos - 1).getChild(0)); - } else { - throw new SemanticException( - ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg( - "Position alias: " + pos + " does not exist\n" + - "The Select List is indexed from 1 to " + selectExpCnt)); - } - } else { - throw new SemanticException( - ErrorMsg.NO_SUPPORTED_ORDERBY_ALLCOLREF_POS.getMsg()); - } - } else { //if not using position alias and it is a number. - warn("Using constant number " + node.getText() + - " in order by. If you try to use position alias when hive.orderby.position.alias is false, the position alias will be ignored."); - } - } - } - } + // orderby position will be processed in genPlan } for (int i = next.getChildren().size() - 1; i >= 0; i--) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java index 57949d90aa..d291e36598 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java @@ -23,7 +23,6 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToBoolean; -import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToBoolean; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDoubleToBooleanViaDoubleToLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong; @@ -34,8 +33,8 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; @@ -48,7 +47,7 @@ * */ @VectorizedExpressions({CastLongToBooleanViaLongToLong.class, - CastDateToBooleanViaLongToLong.class, CastTimestampToBoolean.class, CastStringToBoolean.class, + CastDateToBooleanViaLongToLong.class, CastTimestampToBoolean.class, CastDoubleToBooleanViaDoubleToLong.class, CastDecimalToBoolean.class, CastStringToLong.class}) public class UDFToBoolean extends UDF { private final BooleanWritable booleanWritable = new BooleanWritable(); @@ -173,10 +172,10 @@ public BooleanWritable evaluate(DoubleWritable i) { public BooleanWritable evaluate(Text i) { if (i == null) { return null; + } else { + booleanWritable.set(i.getLength() != 0); + return booleanWritable; } - boolean b = PrimitiveObjectInspectorUtils.parseBoolean(i.getBytes(), 0, i.getLength()); - booleanWritable.set(b); - return booleanWritable; } public BooleanWritable evaluate(DateWritable d) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/wm/Trigger.java b/ql/src/java/org/apache/hadoop/hive/ql/wm/Trigger.java index 4adad7a1b6..e41b460ab9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/wm/Trigger.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/wm/Trigger.java @@ -15,14 +15,11 @@ */ package org.apache.hadoop.hive.ql.wm; -import org.codehaus.jackson.map.annotate.JsonSerialize; - /** * Trigger interface which gets mapped to CREATE TRIGGER .. queries. A trigger can have a name, expression and action. * Trigger is a simple expression which gets evaluated during the lifecycle of query and executes an action * if the expression defined in trigger evaluates to true. */ -@JsonSerialize public interface Trigger { /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/wm/TriggerActionHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/wm/TriggerActionHandler.java index 7995a8f639..8b142dae94 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/wm/TriggerActionHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/wm/TriggerActionHandler.java @@ -22,11 +22,11 @@ /** * Interface for handling rule violations by queries and for performing actions defined in the rules. */ -public interface TriggerActionHandler { +public interface TriggerActionHandler { /** * Applies the action defined in the rule for the specified queries * * @param queriesViolated - violated queries and the rule it violated */ - void applyAction(Map queriesViolated); + void applyAction(Map queriesViolated); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/wm/TriggerContext.java b/ql/src/java/org/apache/hadoop/hive/ql/wm/TriggerContext.java new file mode 100644 index 0000000000..16072c31a0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/wm/TriggerContext.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.wm; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Some context information that are required for rule evaluation. + */ +public class TriggerContext { + private Set desiredCounters = new HashSet<>(); + private Map currentCounters = new HashMap<>(); + private String queryId; + private long queryStartTime; + private boolean queryCompleted; + + public TriggerContext(final long queryStartTime, final String queryId) { + this.queryStartTime = queryStartTime; + this.queryId = queryId; + this.queryCompleted = false; + } + + public String getQueryId() { + return queryId; + } + + public void setQueryId(final String queryId) { + this.queryId = queryId; + } + + public Set getDesiredCounters() { + return desiredCounters; + } + + public void setDesiredCounters(final Set desiredCounters) { + this.desiredCounters = desiredCounters; + } + + public Map getCurrentCounters() { + return currentCounters; + } + + public void setCurrentCounters(final Map currentCounters) { + this.currentCounters = currentCounters; + } + + public long getElapsedTime() { + return System.currentTimeMillis() - queryStartTime; + } + + public boolean isQueryCompleted() { + return queryCompleted; + } + + public void setQueryCompleted(final boolean queryCompleted) { + this.queryCompleted = queryCompleted; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/wm/WmContext.java b/ql/src/java/org/apache/hadoop/hive/ql/wm/WmContext.java deleted file mode 100644 index 7a7ef507e5..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/wm/WmContext.java +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.wm; - -import static org.apache.hadoop.hive.ql.exec.tez.monitoring.Constants.SEPARATOR; - -import java.text.DecimalFormat; -import java.time.Instant; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; - -import javax.management.MXBean; - -import org.apache.hadoop.hive.ql.exec.tez.WmEvent; -import org.apache.hadoop.hive.ql.exec.tez.monitoring.PrintSummary; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.codehaus.jackson.annotate.JsonAutoDetect; -import org.codehaus.jackson.annotate.JsonIgnore; -import org.codehaus.jackson.annotate.JsonProperty; -import org.codehaus.jackson.map.ObjectMapper; -import org.codehaus.jackson.map.SerializationConfig; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Some context information that are required for rule evaluation. - */ -@MXBean -public class WmContext implements PrintSummary { - private static final Logger LOG = LoggerFactory.getLogger(WmContext.class); - @JsonProperty("queryId") - private String queryId; - @JsonProperty("queryStartTime") - private long queryStartTime; - @JsonProperty("queryEndTime") - private long queryEndTime; - @JsonProperty("queryCompleted") - private boolean queryCompleted; - @JsonProperty("queryWmEvents") - private final List queryWmEvents = new LinkedList<>(); - @JsonProperty("appliedTriggers") - private Set appliedTriggers = new HashSet<>(); - @JsonProperty("subscribedCounters") - private Set subscribedCounters = new HashSet<>(); - @JsonProperty("currentCounters") - private Map currentCounters = new HashMap<>(); - @JsonIgnore // explictly ignoring as Getter visibility is ANY for auto-json serialization of Trigger based on getters - private Future returnEventFuture; - - public WmContext(final long queryStartTime, final String queryId) { - this.queryStartTime = queryStartTime; - this.queryId = queryId; - this.queryCompleted = false; - } - - public Set getAppliedTriggers() { - return appliedTriggers; - } - - public void addTriggers(final List triggers) { - if (triggers != null) { - this.appliedTriggers.addAll(triggers); - // reset and add counters. This can happen during start of query or a session being moved to another pool with its - // own set of triggers - Set counters = new HashSet<>(); - for (Trigger trigger : triggers) { - counters.add(trigger.getExpression().getCounterLimit().getName()); - } - setSubscribedCounters(counters); - setCurrentCounters(new HashMap<>()); - } - } - - public String getQueryId() { - return queryId; - } - - public void setQueryId(final String queryId) { - this.queryId = queryId; - } - - public Set getSubscribedCounters() { - return subscribedCounters; - } - - public void setSubscribedCounters(final Set subscribedCounters) { - this.subscribedCounters = subscribedCounters; - } - - public Map getCurrentCounters() { - return currentCounters; - } - - public void setCurrentCounters(final Map currentCounters) { - this.currentCounters = currentCounters; - } - - public long getElapsedTime() { - return System.currentTimeMillis() - queryStartTime; - } - - public boolean isQueryCompleted() { - return queryCompleted; - } - - public void setQueryCompleted(final boolean queryCompleted) { - this.queryCompleted = queryCompleted; - this.queryEndTime = System.currentTimeMillis(); - } - - public void addWMEvent(WmEvent wmEvent) { - queryWmEvents.add(wmEvent); - } - - public long getQueryStartTime() { - return queryStartTime; - } - - public long getQueryEndTime() { - return queryEndTime; - } - - List getQueryWmEvents() { - return queryWmEvents; - } - - Future getReturnEventFuture() { - return returnEventFuture; - } - - public void setReturnEventFuture(final Future returnEventFuture) { - this.returnEventFuture = returnEventFuture; - } - - private static final String WM_EVENTS_HEADER_FORMAT = "%7s %24s %24s %11s %9s %13s"; - private static final String WM_EVENTS_TITLE = "Workload Manager Events Summary"; - private static final String WM_EVENTS_TABLE_HEADER = String.format(WM_EVENTS_HEADER_FORMAT, - "EVENT", "START_TIMESTAMP", "END_TIMESTAMP", "ELAPSED_MS", "CLUSTER %", "POOL"); - private static final DecimalFormat DECIMAL_FORMAT = new DecimalFormat("#0.00"); - - @Override - public void print(final SessionState.LogHelper console) { - try { - waitForReturnSessionEvent(); - boolean first = false; - console.printInfo(""); - console.printInfo(WM_EVENTS_TITLE); - - for (final WmEvent wmEvent : queryWmEvents) { - if (!first) { - console.printInfo(""); - console.printInfo("QueryId: " + queryId); - console.printInfo("SessionId: " + queryWmEvents.get(0).getWmTezSessionInfo().getSessionId()); - console.printInfo("Applied Triggers: " + getAppliedTriggers()); - console.printInfo(SEPARATOR); - console.printInfo(WM_EVENTS_TABLE_HEADER); - console.printInfo(SEPARATOR); - first = true; - } - WmEvent.WmTezSessionInfo wmTezSessionInfo = wmEvent.getWmTezSessionInfo(); - String row = String.format(WM_EVENTS_HEADER_FORMAT, - wmEvent.getEventType(), - Instant.ofEpochMilli(wmEvent.getEventStartTimestamp()).toString(), - Instant.ofEpochMilli(wmEvent.getEventEndTimestamp()).toString(), - wmEvent.getElapsedTime(), - DECIMAL_FORMAT.format(wmTezSessionInfo.getClusterPercent()), - wmTezSessionInfo.getPoolName()); - console.printInfo(row); - } - console.printInfo(SEPARATOR); - console.printInfo(""); - } catch (Exception e) { - LOG.warn("Unable to print WM events summary", e); - } - } - - // TODO: expose all WMContext's via /jmx to use in UI - public void printJson(final SessionState.LogHelper console) { - try { - waitForReturnSessionEvent(); - ObjectMapper objectMapper = new ObjectMapper(); - objectMapper.configure(SerializationConfig.Feature.FAIL_ON_EMPTY_BEANS, false); - // serialize json based on field annotations only - objectMapper.setVisibilityChecker(objectMapper.getSerializationConfig().getDefaultVisibilityChecker() - .withSetterVisibility(JsonAutoDetect.Visibility.NONE)); - String wmContextJson = objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(this); - console.printInfo(""); - console.printInfo(WM_EVENTS_TITLE); - console.printInfo(SEPARATOR); - console.printInfo(wmContextJson); - console.printInfo(SEPARATOR); - console.printInfo(""); - } catch (Exception e) { - LOG.warn("Unable to serialize WMContext to json.", e); - } - } - - private void waitForReturnSessionEvent() throws ExecutionException, InterruptedException { - if (getReturnEventFuture() != null && !Thread.currentThread().isInterrupted()) { - getReturnEventFuture().get(); - } - } - - // prints short events information that are safe for consistent testing - public void shortPrint(final SessionState.LogHelper console) throws ExecutionException, InterruptedException { - waitForReturnSessionEvent(); - console.printInfo(WmContext.WM_EVENTS_TITLE, false); - for (WmEvent wmEvent : getQueryWmEvents()) { - console.printInfo("Event: " + wmEvent.getEventType() + - " Pool: " + wmEvent.getWmTezSessionInfo().getPoolName() + - " Cluster %: " + WmContext.DECIMAL_FORMAT.format(wmEvent.getWmTezSessionInfo().getClusterPercent())); - } - } -} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java index c58e4507f2..78df962a3a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java @@ -51,7 +51,6 @@ import org.apache.hadoop.hive.metastore.api.WMResourcePlan; import org.apache.hadoop.hive.ql.exec.tez.UserPoolMapping.MappingInput; import org.apache.hadoop.hive.ql.wm.SessionTriggerProvider; -import org.apache.hadoop.hive.ql.wm.WmContext; import org.apache.tez.dag.api.TezConfiguration; import org.junit.Test; import org.slf4j.Logger; @@ -87,7 +86,7 @@ public void run() { cdl.countDown(); } try { - session.set((WmTezSession) wm.getSession(old, new MappingInput(userName, null), conf, null)); + session.set((WmTezSession) wm.getSession(old, new MappingInput(userName, null), conf)); } catch (Throwable e) { error.compareAndSet(null, e); } @@ -186,11 +185,10 @@ protected WmTezSession createSessionObject(String sessionId, HiveConf conf) { } @Override - public WmTezSession getSession( - TezSessionState session, MappingInput input, HiveConf conf, - final WmContext wmContext) throws Exception { + public TezSessionState getSession( + TezSessionState session, MappingInput input, HiveConf conf) throws Exception { // We want to wait for the iteration to finish and set the cluster fraction. - WmTezSession state = super.getSession(session, input, conf, null); + TezSessionState state = super.getSession(session, input, conf); ensureWm(); return state; } @@ -229,17 +227,17 @@ public void testReuse() throws Exception { TezSessionState nonPool = mock(TezSessionState.class); when(nonPool.getConf()).thenReturn(conf); doNothing().when(nonPool).close(anyBoolean()); - TezSessionState session = wm.getSession(nonPool, new MappingInput("user", null), conf, null); + TezSessionState session = wm.getSession(nonPool, new MappingInput("user", null), conf); verify(nonPool).close(anyBoolean()); assertNotSame(nonPool, session); session.returnToSessionManager(); TezSessionPoolSession diffPool = mock(TezSessionPoolSession.class); when(diffPool.getConf()).thenReturn(conf); doNothing().when(diffPool).returnToSessionManager(); - session = wm.getSession(diffPool, new MappingInput("user", null), conf, null); + session = wm.getSession(diffPool, new MappingInput("user", null), conf); verify(diffPool).returnToSessionManager(); assertNotSame(diffPool, session); - TezSessionState session2 = wm.getSession(session, new MappingInput("user", null), conf, null); + TezSessionState session2 = wm.getSession(session, new MappingInput("user", null), conf); assertSame(session, session2); } @@ -251,11 +249,11 @@ public void testQueueName() throws Exception { wm.start(); // The queue should be ignored. conf.set(TezConfiguration.TEZ_QUEUE_NAME, "test2"); - TezSessionState session = wm.getSession(null, new MappingInput("user", null), conf, null); + TezSessionState session = wm.getSession(null, new MappingInput("user", null), conf); assertEquals("test", session.getQueueName()); assertEquals("test", conf.get(TezConfiguration.TEZ_QUEUE_NAME)); session.setQueueName("test2"); - session = wm.getSession(session, new MappingInput("user", null), conf, null); + session = wm.getSession(session, new MappingInput("user", null), conf); assertEquals("test", session.getQueueName()); } @@ -271,7 +269,7 @@ public void testReopen() throws Exception { WorkloadManager wm = new WorkloadManagerForTest("test", conf, 1, qam); wm.start(); WmTezSession session = (WmTezSession) wm.getSession( - null, new MappingInput("user", null), conf, null); + null, new MappingInput("user", null), conf); assertEquals(1.0, session.getClusterFraction(), EPSILON); qam.assertWasCalledAndReset(); WmTezSession session2 = (WmTezSession) session.reopen(conf, null); @@ -289,10 +287,10 @@ public void testDestroyAndReturn() throws Exception { MockQam qam = new MockQam(); WorkloadManager wm = new WorkloadManagerForTest("test", conf, 2, qam); wm.start(); - WmTezSession session = (WmTezSession) wm.getSession(null, new MappingInput("user", null), conf, null); + WmTezSession session = (WmTezSession) wm.getSession(null, new MappingInput("user", null), conf); assertEquals(1.0, session.getClusterFraction(), EPSILON); qam.assertWasCalledAndReset(); - WmTezSession session2 = (WmTezSession) wm.getSession(null, new MappingInput("user", null), conf, null); + WmTezSession session2 = (WmTezSession) wm.getSession(null, new MappingInput("user", null), conf); assertEquals(0.5, session.getClusterFraction(), EPSILON); assertEquals(0.5, session2.getClusterFraction(), EPSILON); qam.assertWasCalledAndReset(); @@ -303,7 +301,7 @@ public void testDestroyAndReturn() throws Exception { qam.assertWasCalledAndReset(); // We never lose pool session, so we should still be able to get. - session = (WmTezSession) wm.getSession(null, new MappingInput("user", null), conf, null); + session = (WmTezSession) wm.getSession(null, new MappingInput("user", null), conf); session.returnToSessionManager(); assertEquals(1.0, session2.getClusterFraction(), EPSILON); assertEquals(0.0, session.getClusterFraction(), EPSILON); @@ -324,20 +322,20 @@ public void testClusterFractions() throws Exception { assertEquals(5, wm.getNumSessions()); // Get all the 5 sessions; validate cluster fractions. WmTezSession session05of06 = (WmTezSession) wm.getSession( - null, new MappingInput("p1", null), conf, null); + null, new MappingInput("p1", null), conf); assertEquals(0.3, session05of06.getClusterFraction(), EPSILON); WmTezSession session03of06 = (WmTezSession) wm.getSession( - null, new MappingInput("p2", null), conf, null); + null, new MappingInput("p2", null), conf); assertEquals(0.18, session03of06.getClusterFraction(), EPSILON); WmTezSession session03of06_2 = (WmTezSession) wm.getSession( - null, new MappingInput("p2", null), conf, null); + null, new MappingInput("p2", null), conf); assertEquals(0.09, session03of06.getClusterFraction(), EPSILON); assertEquals(0.09, session03of06_2.getClusterFraction(), EPSILON); WmTezSession session02of06 = (WmTezSession) wm.getSession( - null,new MappingInput("r1", null), conf, null); + null,new MappingInput("r1", null), conf); assertEquals(0.12, session02of06.getClusterFraction(), EPSILON); WmTezSession session04 = (WmTezSession) wm.getSession( - null, new MappingInput("r2", null), conf, null); + null, new MappingInput("r2", null), conf); assertEquals(0.4, session04.getClusterFraction(), EPSILON); session05of06.returnToSessionManager(); session03of06.returnToSessionManager(); @@ -369,7 +367,7 @@ public void testMappings() throws Exception { private static void verifyMapping( WorkloadManager wm, HiveConf conf, MappingInput mi, String result) throws Exception { - WmTezSession session = (WmTezSession) wm.getSession(null, mi, conf, null); + WmTezSession session = (WmTezSession) wm.getSession(null, mi, conf); assertEquals(result, session.getPoolName()); session.returnToSessionManager(); } @@ -383,9 +381,9 @@ public void testQueueing() throws Exception { plan.setMappings(Lists.newArrayList(mapping("A", "A"), mapping("B", "B"))); final WorkloadManager wm = new WorkloadManagerForTest("test", conf, qam, plan); wm.start(); - WmTezSession sessionA1 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf, null), - sessionA2 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf, null), - sessionB1 = (WmTezSession) wm.getSession(null, new MappingInput("B", null), conf, null); + WmTezSession sessionA1 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf), + sessionA2 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf), + sessionB1 = (WmTezSession) wm.getSession(null, new MappingInput("B", null), conf); final AtomicReference sessionA3 = new AtomicReference<>(), sessionA4 = new AtomicReference<>(); final AtomicReference error = new AtomicReference<>(); @@ -399,7 +397,7 @@ public void testQueueing() throws Exception { assertNull(sessionA4.get()); checkError(error); // While threads are blocked on A, we should still be able to get and return a B session. - WmTezSession sessionB2 = (WmTezSession) wm.getSession(null, new MappingInput("B", null), conf, null); + WmTezSession sessionB2 = (WmTezSession) wm.getSession(null, new MappingInput("B", null), conf); sessionB1.returnToSessionManager(); sessionB2.returnToSessionManager(); assertNull(sessionA3.get()); @@ -427,8 +425,8 @@ public void testClusterChange() throws Exception { plan.getPlan().setDefaultPoolPath("A"); final WorkloadManager wm = new WorkloadManagerForTest("test", conf, qam, plan); wm.start(); - WmTezSession session1 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf, null), - session2 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf, null); + WmTezSession session1 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf), + session2 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf); assertEquals(0.5, session1.getClusterFraction(), EPSILON); assertEquals(0.5, session2.getClusterFraction(), EPSILON); qam.assertWasCalledAndReset(); @@ -450,19 +448,19 @@ public void testReuseWithQueueing() throws Exception { final WorkloadManager wm = new WorkloadManagerForTest("test", conf, 2, qam); wm.start(); WmTezSession session1 = (WmTezSession) wm.getSession( - null, new MappingInput("user", null), conf, null); + null, new MappingInput("user", null), conf); // First, try to reuse from the same pool - should "just work". WmTezSession session1a = (WmTezSession) wm.getSession( - session1, new MappingInput("user", null), conf, null); + session1, new MappingInput("user", null), conf); assertSame(session1, session1a); assertEquals(1.0, session1.getClusterFraction(), EPSILON); // Should still be able to get the 2nd session. WmTezSession session2 = (WmTezSession) wm.getSession( - null, new MappingInput("user", null), conf, null); + null, new MappingInput("user", null), conf); // Now try to reuse with no other sessions remaining. Should still work. WmTezSession session2a = (WmTezSession) wm.getSession( - session2, new MappingInput("user", null), conf, null); + session2, new MappingInput("user", null), conf); assertSame(session2, session2a); assertEquals(0.5, session1.getClusterFraction(), EPSILON); assertEquals(0.5, session2.getClusterFraction(), EPSILON); @@ -519,19 +517,19 @@ public void testReuseWithDifferentPool() throws Exception { plan.setMappings(Lists.newArrayList(mapping("A", "A"), mapping("B", "B"))); final WorkloadManager wm = new WorkloadManagerForTest("test", conf, qam, plan); wm.start(); - WmTezSession sessionA1 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf, null), - sessionA2 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf, null); + WmTezSession sessionA1 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf), + sessionA2 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf); assertEquals("A", sessionA1.getPoolName()); assertEquals(0.3f, sessionA1.getClusterFraction(), EPSILON); assertEquals("A", sessionA2.getPoolName()); assertEquals(0.3f, sessionA2.getClusterFraction(), EPSILON); - WmTezSession sessionB1 = (WmTezSession) wm.getSession(sessionA1, new MappingInput("B", null), conf, null); + WmTezSession sessionB1 = (WmTezSession) wm.getSession(sessionA1, new MappingInput("B", null), conf); assertSame(sessionA1, sessionB1); assertEquals("B", sessionB1.getPoolName()); assertEquals(0.4f, sessionB1.getClusterFraction(), EPSILON); assertEquals(0.6f, sessionA2.getClusterFraction(), EPSILON); // A1 removed from A. // Make sure that we can still get a session from A. - WmTezSession sessionA3 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf, null); + WmTezSession sessionA3 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf); assertEquals("A", sessionA3.getPoolName()); assertEquals(0.3f, sessionA3.getClusterFraction(), EPSILON); assertEquals(0.3f, sessionA3.getClusterFraction(), EPSILON); @@ -551,7 +549,7 @@ public void testApplyPlanUserMapping() throws Exception { wm.start(); // One session will be running, the other will be queued in "A" - WmTezSession sessionA1 = (WmTezSession) wm.getSession(null, new MappingInput("U", null), conf, null); + WmTezSession sessionA1 = (WmTezSession) wm.getSession(null, new MappingInput("U", null), conf); assertEquals("A", sessionA1.getPoolName()); assertEquals(0.5f, sessionA1.getClusterFraction(), EPSILON); final AtomicReference sessionA2 = new AtomicReference<>(); @@ -576,7 +574,7 @@ public void testApplyPlanUserMapping() throws Exception { assertEquals(0.4f, sessionA2.get().getClusterFraction(), EPSILON); // The new session will also go to B now. sessionA2.get().returnToSessionManager(); - WmTezSession sessionB1 = (WmTezSession) wm.getSession(null, new MappingInput("U", null), conf, null); + WmTezSession sessionB1 = (WmTezSession) wm.getSession(null, new MappingInput("U", null), conf); assertEquals("B", sessionB1.getPoolName()); assertEquals(0.4f, sessionB1.getClusterFraction(), EPSILON); sessionA1.returnToSessionManager(); @@ -600,11 +598,11 @@ public void testApplyPlanQpChanges() throws Exception { // A: 1/1 running, 1 queued; B: 2/2 running, C: 1/2 running, D: 1/1 running, 1 queued. // Total: 5/6 running. - WmTezSession sessionA1 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf, null), - sessionB1 = (WmTezSession) wm.getSession(null, new MappingInput("B", null), conf, null), - sessionB2 = (WmTezSession) wm.getSession(null, new MappingInput("B", null), conf, null), - sessionC1 = (WmTezSession) wm.getSession(null, new MappingInput("C", null), conf, null), - sessionD1 = (WmTezSession) wm.getSession(null, new MappingInput("D", null), conf, null); + WmTezSession sessionA1 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf), + sessionB1 = (WmTezSession) wm.getSession(null, new MappingInput("B", null), conf), + sessionB2 = (WmTezSession) wm.getSession(null, new MappingInput("B", null), conf), + sessionC1 = (WmTezSession) wm.getSession(null, new MappingInput("C", null), conf), + sessionD1 = (WmTezSession) wm.getSession(null, new MappingInput("D", null), conf); final AtomicReference sessionA2 = new AtomicReference<>(), sessionD2 = new AtomicReference<>(); final AtomicReference error = new AtomicReference<>(); @@ -740,7 +738,7 @@ public void testMoveSessions() throws Exception { final WorkloadManager wm = new WorkloadManagerForTest("test", conf, qam, plan); wm.start(); - WmTezSession sessionA1 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf, null); + WmTezSession sessionA1 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf); // [A: 1, B: 0] Map allSessionProviders = wm.getAllSessionTriggerProviders(); @@ -764,7 +762,7 @@ public void testMoveSessions() throws Exception { assertEquals(0.4f, sessionA1.getClusterFraction(), EPSILON); assertEquals("B", sessionA1.getPoolName()); - WmTezSession sessionA2 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf, null); + WmTezSession sessionA2 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf); // [A: 1, B: 1] allSessionProviders = wm.getAllSessionTriggerProviders(); assertEquals(1, allSessionProviders.get("A").getSessions().size()); @@ -791,7 +789,7 @@ public void testMoveSessions() throws Exception { assertEquals("B", sessionA2.getPoolName()); assertEquals("B", sessionA1.getPoolName()); - WmTezSession sessionA3 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf, null); + WmTezSession sessionA3 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf); // [A: 1, B: 2] allSessionProviders = wm.getAllSessionTriggerProviders(); assertEquals(1, allSessionProviders.get("A").getSessions().size()); @@ -831,7 +829,7 @@ public void testMoveSessionsMultiPool() throws Exception { final WorkloadManager wm = new WorkloadManagerForTest("test", conf, qam, plan); wm.start(); - WmTezSession sessionA1 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf, null); + WmTezSession sessionA1 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf); // [A: 1, B: 0, B.x: 0, B.y: 0, C: 0] Map allSessionProviders = wm.getAllSessionTriggerProviders(); @@ -889,7 +887,7 @@ public void testMoveSessionsMultiPool() throws Exception { assertTrue(allSessionProviders.get("B.x").getSessions().contains(sessionA1)); assertEquals("B.x", sessionA1.getPoolName()); - WmTezSession sessionA2 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf, null); + WmTezSession sessionA2 = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf); // [A: 1, B: 0, B.x: 1, B.y: 0, C: 0] allSessionProviders = wm.getAllSessionTriggerProviders(); assertEquals(1, allSessionProviders.get("A").getSessions().size()); @@ -988,7 +986,7 @@ public void testAsyncSessionInitFailures() throws Exception { failedWait.setException(new Exception("foo")); theOnlySession.setWaitForAmRegistryFuture(failedWait); try { - TezSessionState r = wm.getSession(null, new MappingInput("A", null), conf, null); + TezSessionState r = wm.getSession(null, new MappingInput("A", null), conf); fail("Expected an error but got " + r); } catch (Exception ex) { // Expected. @@ -1039,7 +1037,7 @@ private SampleTezSessionState validatePoolAfterCleanup( assertEquals(0f, oldSession.getClusterFraction(), EPSILON); pool.returnSession(theOnlySession); // Make sure we can actually get a session still - parallelism/etc. should not be affected. - WmTezSession result = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf, null); + WmTezSession result = (WmTezSession) wm.getSession(null, new MappingInput("A", null), conf); assertEquals(sessionPoolName, result.getPoolName()); assertEquals(1f, result.getClusterFraction(), EPSILON); result.returnToSessionManager(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java index e89f2e5a02..fb791160c6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.io.UnsupportedEncodingException; -import java.nio.charset.StandardCharsets; import java.sql.Timestamp; import java.util.Arrays; import java.util.Random; @@ -254,29 +253,6 @@ public static VectorizedRowBatch getVectorizedRowBatchLongInLongOut() { return batch; } - public static VectorizedRowBatch getVectorizedRowBatchStringInLongOut() { - VectorizedRowBatch batch = new VectorizedRowBatch(2); - BytesColumnVector inV; - LongColumnVector outV; - inV = new BytesColumnVector(); - outV = new LongColumnVector(); - inV.initBuffer(); - inV.setVal(0, StandardCharsets.UTF_8.encode("true").array()); - inV.setVal(1, StandardCharsets.UTF_8.encode("TRUE").array()); - inV.setVal(2, StandardCharsets.UTF_8.encode("TrUe").array()); - inV.setVal(3, StandardCharsets.UTF_8.encode("false").array()); - inV.setVal(4, StandardCharsets.UTF_8.encode("FALSE").array()); - inV.setVal(5, StandardCharsets.UTF_8.encode("FaLsE").array()); - inV.setVal(6, StandardCharsets.UTF_8.encode("").array()); - inV.setVal(7, StandardCharsets.UTF_8.encode("Other").array()); - - batch.cols[0] = inV; - batch.cols[1] = outV; - - batch.size = 8; - return batch; - } - public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[] longValues) { Random r = new Random(345); VectorizedRowBatch batch = new VectorizedRowBatch(2); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java index 6952b4598f..fb8035b687 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java @@ -128,23 +128,6 @@ public void testCastLongToBoolean() { } @Test - public void testCastStringToBoolean() { - VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchStringInLongOut(); - LongColumnVector resultV = (LongColumnVector) b.cols[1]; - b.cols[0].noNulls = true; - VectorExpression expr = new CastStringToBoolean(0, 1); - expr.evaluate(b); - Assert.assertEquals(1, resultV.vector[0]); // true - Assert.assertEquals(1, resultV.vector[1]); // true - Assert.assertEquals(1, resultV.vector[2]); // true - Assert.assertEquals(0, resultV.vector[3]); // false - Assert.assertEquals(0, resultV.vector[4]); // false - Assert.assertEquals(0, resultV.vector[5]); // false - Assert.assertEquals(0, resultV.vector[6]); // false - Assert.assertEquals(1, resultV.vector[7]); // true - } - - @Test public void testCastLongToTimestamp() { long[] longValues = new long[500]; VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInTimestampOut(longValues); diff --git a/ql/src/test/queries/clientpositive/groupby_position.q b/ql/src/test/queries/clientpositive/groupby_position.q index 446b99d3cb..f7e257d8fa 100644 --- a/ql/src/test/queries/clientpositive/groupby_position.q +++ b/ql/src/test/queries/clientpositive/groupby_position.q @@ -71,14 +71,3 @@ FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 ) c ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC; - -set hive.cbo.enable=false; -EXPLAIN -SELECT key FROM src ORDER BY 1; - -SELECT key FROM src ORDER BY 1; - -EXPLAIN -SELECT distinct key FROM src ORDER BY 1; - -SELECT distinct key FROM src ORDER BY 1; diff --git a/ql/src/test/queries/clientpositive/subquery_corr.q b/ql/src/test/queries/clientpositive/subquery_corr.q new file mode 100644 index 0000000000..10b4c3adb6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/subquery_corr.q @@ -0,0 +1,12 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; + +-- inner query has non-equi correlated predicate, this shouldn't have value gen +explain select * from src b where b.key in (select key from src a where b.value > a.value); +select * from src b where b.key in (select key from src a where b.value > a.value); + +explain select * from src b where b.key in (select key from src a where b.value <= a.value); +select * from src b where b.key in (select key from src a where b.value <= a.value); + +explain select * from src b where b.key in (select key from src a where b.value > a.value and b.key < a.key) ; +select * from src b where b.key in (select key from src a where b.value > a.value and b.key < a.key) ; diff --git a/ql/src/test/queries/clientpositive/subquery_in.q b/ql/src/test/queries/clientpositive/subquery_in.q index 7d4ece9dca..a14bc47ee7 100644 --- a/ql/src/test/queries/clientpositive/subquery_in.q +++ b/ql/src/test/queries/clientpositive/subquery_in.q @@ -301,5 +301,3 @@ select * from part where p_size IN (select max(p_size) from part p where p.p_typ -- inner query has join so should have a join with outer query to fetch all corr values explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name); select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name); - - diff --git a/ql/src/test/queries/clientpositive/udf_to_boolean.q b/ql/src/test/queries/clientpositive/udf_to_boolean.q index 1a50d055d5..8bea7abcbc 100644 --- a/ql/src/test/queries/clientpositive/udf_to_boolean.q +++ b/ql/src/test/queries/clientpositive/udf_to_boolean.q @@ -12,9 +12,6 @@ SELECT CAST(CAST(-8.0 AS DOUBLE) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST(-99.0 AS DECIMAL) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST('Foo' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); -SELECT CAST(CAST('TRUE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); -SELECT CAST(CAST('true' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); -SELECT CAST(CAST('TrUe' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST('2011-05-06 07:08:09' as timestamp) AS BOOLEAN) FROM src tablesample (1 rows); @@ -30,9 +27,6 @@ SELECT CAST(CAST(0.0 AS DOUBLE) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST(0.0 AS DECIMAL) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST('' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); -SELECT CAST(CAST('FALSE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); -SELECT CAST(CAST('false' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); -SELECT CAST(CAST('FaLsE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST(0 as timestamp) AS BOOLEAN) FROM src tablesample (1 rows); diff --git a/ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q b/ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q deleted file mode 100644 index eeb5ab8819..0000000000 --- a/ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q +++ /dev/null @@ -1,23 +0,0 @@ -set hive.mapred.mode=nonstrict; -SET hive.vectorized.execution.enabled = true; -SET hive.int.timestamp.conversion.in.seconds=false; -set hive.fetch.task.conversion=none; - -create table t (s string) stored as orc; - -insert into t values ('false'); -insert into t values ('FALSE'); -insert into t values ('FaLsE'); -insert into t values ('true'); -insert into t values ('TRUE'); -insert into t values ('TrUe'); -insert into t values (''); -insert into t values ('Other'); -insert into t values ('Off'); -insert into t values ('No'); -insert into t values ('0'); -insert into t values ('1'); - -explain select s,cast(s as boolean) from t order by s; - -select s,cast(s as boolean) from t order by s; diff --git a/ql/src/test/results/clientpositive/groupby_position.q.out b/ql/src/test/results/clientpositive/groupby_position.q.out index 7351a06f9c..16924f3459 100644 --- a/ql/src/test/results/clientpositive/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/groupby_position.q.out @@ -703,944 +703,3 @@ POSTHOOK: Input: default@src 19 val_19 19 val_19 18 val_18 18 val_18 17 val_17 17 val_17 -PREHOOK: query: EXPLAIN -SELECT key FROM src ORDER BY 1 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT key FROM src ORDER BY 1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT key FROM src ORDER BY 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM src ORDER BY 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 -0 -0 -10 -100 -100 -103 -103 -104 -104 -105 -11 -111 -113 -113 -114 -116 -118 -118 -119 -119 -119 -12 -12 -120 -120 -125 -125 -126 -128 -128 -128 -129 -129 -131 -133 -134 -134 -136 -137 -137 -138 -138 -138 -138 -143 -145 -146 -146 -149 -149 -15 -15 -150 -152 -152 -153 -155 -156 -157 -158 -160 -162 -163 -164 -164 -165 -165 -166 -167 -167 -167 -168 -169 -169 -169 -169 -17 -170 -172 -172 -174 -174 -175 -175 -176 -176 -177 -178 -179 -179 -18 -18 -180 -181 -183 -186 -187 -187 -187 -189 -19 -190 -191 -191 -192 -193 -193 -193 -194 -195 -195 -196 -197 -197 -199 -199 -199 -2 -20 -200 -200 -201 -202 -203 -203 -205 -205 -207 -207 -208 -208 -208 -209 -209 -213 -213 -214 -216 -216 -217 -217 -218 -219 -219 -221 -221 -222 -223 -223 -224 -224 -226 -228 -229 -229 -230 -230 -230 -230 -230 -233 -233 -235 -237 -237 -238 -238 -239 -239 -24 -24 -241 -242 -242 -244 -247 -248 -249 -252 -255 -255 -256 -256 -257 -258 -26 -26 -260 -262 -263 -265 -265 -266 -27 -272 -272 -273 -273 -273 -274 -275 -277 -277 -277 -277 -278 -278 -28 -280 -280 -281 -281 -282 -282 -283 -284 -285 -286 -287 -288 -288 -289 -291 -292 -296 -298 -298 -298 -30 -302 -305 -306 -307 -307 -308 -309 -309 -310 -311 -311 -311 -315 -316 -316 -316 -317 -317 -318 -318 -318 -321 -321 -322 -322 -323 -325 -325 -327 -327 -327 -33 -331 -331 -332 -333 -333 -335 -336 -338 -339 -34 -341 -342 -342 -344 -344 -345 -348 -348 -348 -348 -348 -35 -35 -35 -351 -353 -353 -356 -360 -362 -364 -365 -366 -367 -367 -368 -369 -369 -369 -37 -37 -373 -374 -375 -377 -378 -379 -382 -382 -384 -384 -384 -386 -389 -392 -393 -394 -395 -395 -396 -396 -396 -397 -397 -399 -399 -4 -400 -401 -401 -401 -401 -401 -402 -403 -403 -403 -404 -404 -406 -406 -406 -406 -407 -409 -409 -409 -41 -411 -413 -413 -414 -414 -417 -417 -417 -418 -419 -42 -42 -421 -424 -424 -427 -429 -429 -43 -430 -430 -430 -431 -431 -431 -432 -435 -436 -437 -438 -438 -438 -439 -439 -44 -443 -444 -446 -448 -449 -452 -453 -454 -454 -454 -455 -457 -458 -458 -459 -459 -460 -462 -462 -463 -463 -466 -466 -466 -467 -468 -468 -468 -468 -469 -469 -469 -469 -469 -47 -470 -472 -475 -477 -478 -478 -479 -480 -480 -480 -481 -482 -483 -484 -485 -487 -489 -489 -489 -489 -490 -491 -492 -492 -493 -494 -495 -496 -497 -498 -498 -498 -5 -5 -5 -51 -51 -53 -54 -57 -58 -58 -64 -65 -66 -67 -67 -69 -70 -70 -70 -72 -72 -74 -76 -76 -77 -78 -8 -80 -82 -83 -83 -84 -84 -85 -86 -87 -9 -90 -90 -90 -92 -95 -95 -96 -97 -97 -98 -98 -PREHOOK: query: EXPLAIN -SELECT distinct key FROM src ORDER BY 1 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT distinct key FROM src ORDER BY 1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT distinct key FROM src ORDER BY 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT distinct key FROM src ORDER BY 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 -10 -100 -103 -104 -105 -11 -111 -113 -114 -116 -118 -119 -12 -120 -125 -126 -128 -129 -131 -133 -134 -136 -137 -138 -143 -145 -146 -149 -15 -150 -152 -153 -155 -156 -157 -158 -160 -162 -163 -164 -165 -166 -167 -168 -169 -17 -170 -172 -174 -175 -176 -177 -178 -179 -18 -180 -181 -183 -186 -187 -189 -19 -190 -191 -192 -193 -194 -195 -196 -197 -199 -2 -20 -200 -201 -202 -203 -205 -207 -208 -209 -213 -214 -216 -217 -218 -219 -221 -222 -223 -224 -226 -228 -229 -230 -233 -235 -237 -238 -239 -24 -241 -242 -244 -247 -248 -249 -252 -255 -256 -257 -258 -26 -260 -262 -263 -265 -266 -27 -272 -273 -274 -275 -277 -278 -28 -280 -281 -282 -283 -284 -285 -286 -287 -288 -289 -291 -292 -296 -298 -30 -302 -305 -306 -307 -308 -309 -310 -311 -315 -316 -317 -318 -321 -322 -323 -325 -327 -33 -331 -332 -333 -335 -336 -338 -339 -34 -341 -342 -344 -345 -348 -35 -351 -353 -356 -360 -362 -364 -365 -366 -367 -368 -369 -37 -373 -374 -375 -377 -378 -379 -382 -384 -386 -389 -392 -393 -394 -395 -396 -397 -399 -4 -400 -401 -402 -403 -404 -406 -407 -409 -41 -411 -413 -414 -417 -418 -419 -42 -421 -424 -427 -429 -43 -430 -431 -432 -435 -436 -437 -438 -439 -44 -443 -444 -446 -448 -449 -452 -453 -454 -455 -457 -458 -459 -460 -462 -463 -466 -467 -468 -469 -47 -470 -472 -475 -477 -478 -479 -480 -481 -482 -483 -484 -485 -487 -489 -490 -491 -492 -493 -494 -495 -496 -497 -498 -5 -51 -53 -54 -57 -58 -64 -65 -66 -67 -69 -70 -72 -74 -76 -77 -78 -8 -80 -82 -83 -84 -85 -86 -87 -9 -90 -92 -95 -96 -97 -98 diff --git a/ql/src/test/results/clientpositive/llap/insert_values_orig_table_use_metadata.q.out b/ql/src/test/results/clientpositive/llap/insert_values_orig_table_use_metadata.q.out index 143742b3be..92c4eafd54 100644 --- a/ql/src/test/results/clientpositive/llap/insert_values_orig_table_use_metadata.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_values_orig_table_use_metadata.q.out @@ -107,7 +107,6 @@ Table Parameters: rawDataSize 0 totalSize 0 transactional true - transactional_properties default #### A masked pattern was here #### # Storage Information @@ -173,7 +172,6 @@ Table Parameters: rawDataSize 0 totalSize 295399 transactional true - transactional_properties default #### A masked pattern was here #### # Storage Information @@ -375,9 +373,8 @@ Table Parameters: numFiles 1 numRows 0 rawDataSize 0 - totalSize 1554 + totalSize 1555 transactional true - transactional_properties default #### A masked pattern was here #### # Storage Information @@ -410,9 +407,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_ivot - Statistics: Num rows: 1 Data size: 1554 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1555 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 1554 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1555 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -510,7 +507,6 @@ Table Parameters: rawDataSize 0 totalSize 3109 transactional true - transactional_properties default #### A masked pattern was here #### # Storage Information @@ -641,7 +637,6 @@ Table Parameters: rawDataSize 0 totalSize 298508 transactional true - transactional_properties default #### A masked pattern was here #### # Storage Information @@ -907,17 +902,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: sp - Statistics: Num rows: 1 Data size: 5820 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 5820 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -928,10 +923,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/subquery_corr.q.out b/ql/src/test/results/clientpositive/llap/subquery_corr.q.out new file mode 100644 index 0000000000..5fa47d26df --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/subquery_corr.q.out @@ -0,0 +1,797 @@ +PREHOOK: query: explain select * from src b where b.key in (select key from src a where b.value > a.value) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src b where b.key in (select key from src a where b.value > a.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + residual filter predicates: {(_col1 > _col3)} + Statistics: Num rows: 134 Data size: 36046 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 134 Data size: 23852 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 134 Data size: 23852 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src b where b.key in (select key from src a where b.value > a.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src b where b.key in (select key from src a where b.value > a.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain select * from src b where b.key in (select key from src a where b.value <= a.value) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src b where b.key in (select key from src a where b.value <= a.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + residual filter predicates: {(_col1 <= _col3)} + Statistics: Num rows: 134 Data size: 36046 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 134 Data size: 23852 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 134 Data size: 23852 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src b where b.key in (select key from src a where b.value <= a.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src b where b.key in (select key from src a where b.value <= a.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +4 val_4 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +402 val_402 +403 val_403 +403 val_403 +403 val_403 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +407 val_407 +409 val_409 +409 val_409 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +413 val_413 +414 val_414 +414 val_414 +417 val_417 +417 val_417 +417 val_417 +418 val_418 +419 val_419 +42 val_42 +42 val_42 +421 val_421 +424 val_424 +424 val_424 +427 val_427 +429 val_429 +429 val_429 +43 val_43 +430 val_430 +430 val_430 +430 val_430 +431 val_431 +431 val_431 +431 val_431 +432 val_432 +435 val_435 +436 val_436 +437 val_437 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +44 val_44 +443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 +454 val_454 +454 val_454 +454 val_454 +455 val_455 +457 val_457 +458 val_458 +458 val_458 +459 val_459 +459 val_459 +460 val_460 +462 val_462 +462 val_462 +463 val_463 +463 val_463 +466 val_466 +466 val_466 +466 val_466 +467 val_467 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +47 val_47 +470 val_470 +472 val_472 +475 val_475 +477 val_477 +478 val_478 +478 val_478 +479 val_479 +480 val_480 +480 val_480 +480 val_480 +481 val_481 +482 val_482 +483 val_483 +484 val_484 +485 val_485 +487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +490 val_490 +491 val_491 +492 val_492 +492 val_492 +493 val_493 +494 val_494 +495 val_495 +496 val_496 +497 val_497 +498 val_498 +498 val_498 +498 val_498 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +53 val_53 +54 val_54 +57 val_57 +58 val_58 +58 val_58 +64 val_64 +65 val_65 +66 val_66 +67 val_67 +67 val_67 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: explain select * from src b where b.key in (select key from src a where b.value > a.value and b.key < a.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src b where b.key in (select key from src a where b.value > a.value and b.key < a.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 132500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 132500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 132500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + residual filter predicates: {(_col0 < _col3)} {(_col1 > _col4)} + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src b where b.key in (select key from src a where b.value > a.value and b.key < a.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src b where b.key in (select key from src a where b.value > a.value and b.key < a.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out index dfe424046e..de1f7aeed3 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out @@ -1326,7 +1326,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - residual filter predicates: {(_col1 <> _col3)} + residual filter predicates: {(_col3 <> _col1)} Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 5dcdfdd15f..df0dc8508d 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -885,7 +885,6 @@ POSTHOOK: Input: default@src 97 val_97 98 val_98 98 val_98 -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from src b where b.key in (select distinct key from src a where a.value > b.value) PREHOOK: type: QUERY POSTHOOK: query: explain select * from src b where b.key in (select distinct key from src a where a.value > b.value) @@ -900,8 +899,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -910,18 +908,6 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -929,29 +915,24 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: value (type: string) + keys: key (type: string), value (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -961,55 +942,41 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - residual filter predicates: {(_col1 > _col2)} - Statistics: Num rows: 41666 Data size: 11208154 Basic stats: COMPLETE Column stats: COMPLETE + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + residual filter predicates: {(_col3 > _col1)} + Statistics: Num rows: 67 Data size: 18023 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 41666 Data size: 7416548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10609 Data size: 1888402 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 10609 Data size: 1888402 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 + Statistics: Num rows: 67 Data size: 11926 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 67 Data size: 11926 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 22250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 22250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -1017,7 +984,6 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from src b where b.key in (select distinct key from src a where a.value > b.value) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -1026,503 +992,6 @@ POSTHOOK: query: select * from src b where b.key in (select distinct key from sr POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -10 val_10 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -131 val_131 -133 val_133 -134 val_134 -134 val_134 -136 val_136 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -145 val_145 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -15 val_15 -15 val_15 -150 val_150 -152 val_152 -152 val_152 -153 val_153 -155 val_155 -156 val_156 -157 val_157 -158 val_158 -160 val_160 -162 val_162 -163 val_163 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -17 val_17 -170 val_170 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -177 val_177 -178 val_178 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -180 val_180 -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -19 val_19 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2 -20 val_20 -200 val_200 -200 val_200 -201 val_201 -202 val_202 -203 val_203 -203 val_203 -205 val_205 -205 val_205 -207 val_207 -207 val_207 -208 val_208 -208 val_208 -208 val_208 -209 val_209 -209 val_209 -213 val_213 -213 val_213 -214 val_214 -216 val_216 -216 val_216 -217 val_217 -217 val_217 -218 val_218 -219 val_219 -219 val_219 -221 val_221 -221 val_221 -222 val_222 -223 val_223 -223 val_223 -224 val_224 -224 val_224 -226 val_226 -228 val_228 -229 val_229 -229 val_229 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -233 val_233 -233 val_233 -235 val_235 -237 val_237 -237 val_237 -238 val_238 -238 val_238 -239 val_239 -239 val_239 -24 val_24 -24 val_24 -241 val_241 -242 val_242 -242 val_242 -244 val_244 -247 val_247 -248 val_248 -249 val_249 -252 val_252 -255 val_255 -255 val_255 -256 val_256 -256 val_256 -257 val_257 -258 val_258 -26 val_26 -26 val_26 -260 val_260 -262 val_262 -263 val_263 -265 val_265 -265 val_265 -266 val_266 -27 val_27 -272 val_272 -272 val_272 -273 val_273 -273 val_273 -273 val_273 -274 val_274 -275 val_275 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -278 val_278 -278 val_278 -28 val_28 -280 val_280 -280 val_280 -281 val_281 -281 val_281 -282 val_282 -282 val_282 -283 val_283 -284 val_284 -285 val_285 -286 val_286 -287 val_287 -288 val_288 -288 val_288 -289 val_289 -291 val_291 -292 val_292 -296 val_296 -298 val_298 -298 val_298 -298 val_298 -30 val_30 -302 val_302 -305 val_305 -306 val_306 -307 val_307 -307 val_307 -308 val_308 -309 val_309 -309 val_309 -310 val_310 -311 val_311 -311 val_311 -311 val_311 -315 val_315 -316 val_316 -316 val_316 -316 val_316 -317 val_317 -317 val_317 -318 val_318 -318 val_318 -318 val_318 -321 val_321 -321 val_321 -322 val_322 -322 val_322 -323 val_323 -325 val_325 -325 val_325 -327 val_327 -327 val_327 -327 val_327 -33 val_33 -331 val_331 -331 val_331 -332 val_332 -333 val_333 -333 val_333 -335 val_335 -336 val_336 -338 val_338 -339 val_339 -34 val_34 -341 val_341 -342 val_342 -342 val_342 -344 val_344 -344 val_344 -345 val_345 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -35 val_35 -35 val_35 -35 val_35 -351 val_351 -353 val_353 -353 val_353 -356 val_356 -360 val_360 -362 val_362 -364 val_364 -365 val_365 -366 val_366 -367 val_367 -367 val_367 -368 val_368 -369 val_369 -369 val_369 -369 val_369 -37 val_37 -37 val_37 -373 val_373 -374 val_374 -375 val_375 -377 val_377 -378 val_378 -379 val_379 -382 val_382 -382 val_382 -384 val_384 -384 val_384 -384 val_384 -386 val_386 -389 val_389 -392 val_392 -393 val_393 -394 val_394 -395 val_395 -395 val_395 -396 val_396 -396 val_396 -396 val_396 -397 val_397 -397 val_397 -399 val_399 -399 val_399 -4 val_4 -400 val_400 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -402 val_402 -403 val_403 -403 val_403 -403 val_403 -404 val_404 -404 val_404 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -407 val_407 -409 val_409 -409 val_409 -409 val_409 -41 val_41 -411 val_411 -413 val_413 -413 val_413 -414 val_414 -414 val_414 -417 val_417 -417 val_417 -417 val_417 -418 val_418 -419 val_419 -42 val_42 -42 val_42 -421 val_421 -424 val_424 -424 val_424 -427 val_427 -429 val_429 -429 val_429 -43 val_43 -430 val_430 -430 val_430 -430 val_430 -431 val_431 -431 val_431 -431 val_431 -432 val_432 -435 val_435 -436 val_436 -437 val_437 -438 val_438 -438 val_438 -438 val_438 -439 val_439 -439 val_439 -44 val_44 -443 val_443 -444 val_444 -446 val_446 -448 val_448 -449 val_449 -452 val_452 -453 val_453 -454 val_454 -454 val_454 -454 val_454 -455 val_455 -457 val_457 -458 val_458 -458 val_458 -459 val_459 -459 val_459 -460 val_460 -462 val_462 -462 val_462 -463 val_463 -463 val_463 -466 val_466 -466 val_466 -466 val_466 -467 val_467 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -47 val_47 -470 val_470 -472 val_472 -475 val_475 -477 val_477 -478 val_478 -478 val_478 -479 val_479 -480 val_480 -480 val_480 -480 val_480 -481 val_481 -482 val_482 -483 val_483 -484 val_484 -485 val_485 -487 val_487 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -490 val_490 -491 val_491 -492 val_492 -492 val_492 -493 val_493 -494 val_494 -495 val_495 -496 val_496 -497 val_497 -498 val_498 -498 val_498 -498 val_498 -5 val_5 -5 val_5 -5 val_5 -51 val_51 -51 val_51 -53 val_53 -54 val_54 -57 val_57 -58 val_58 -58 val_58 -64 val_64 -65 val_65 -66 val_66 -67 val_67 -67 val_67 -69 val_69 -70 val_70 -70 val_70 -70 val_70 -72 val_72 -72 val_72 -74 val_74 -76 val_76 -76 val_76 -77 val_77 -78 val_78 -8 val_8 -80 val_80 -82 val_82 -83 val_83 -83 val_83 -84 val_84 -84 val_84 -85 val_85 -86 val_86 -87 val_87 -9 val_9 -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 PREHOOK: query: select p_mfgr, p_name, p_size from part where part.p_size in diff --git a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out index 0ffbaaea34..50ac65630f 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out @@ -1623,7 +1623,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Reducer 11 - residual filter predicates: {(_col1 <> _col2)} + residual filter predicates: {(_col2 <> _col1)} Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col3 is null (type: boolean) @@ -1668,7 +1668,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Reducer 12 - residual filter predicates: {(_col1 <> _col2)} + residual filter predicates: {(_col2 <> _col1)} Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col3 is null (type: boolean) @@ -1711,7 +1711,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Reducer 13 - residual filter predicates: {(_col1 <> _col2)} + residual filter predicates: {(_col2 <> _col1)} Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col3 is null (type: boolean) diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 5da12584f0..a7233e848a 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -7130,8 +7130,7 @@ PREHOOK: query: drop table t1 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table t1 POSTHOOK: type: DROPTABLE -Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[57][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from src b where b.key not in @@ -7157,14 +7156,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 8 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) - Reducer 4 <- Map 1 (XPROD_EDGE), Reducer 9 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Map 1 (XPROD_EDGE), Reducer 10 (XPROD_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -7194,19 +7191,21 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string) Filter Operator - predicate: (key > '9') (type: boolean) + predicate: ((key > '9') and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) + Group By Operator + keys: key (type: string), value (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: b @@ -7221,25 +7220,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Reducer 10 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 22750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -7252,11 +7234,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 500 Data size: 91672 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 91672 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: bigint), _col4 (type: bigint) + value expressions: _col1 (type: string), _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -7264,20 +7246,21 @@ STAGE PLANS: condition map: Left Outer Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4, _col7 - Statistics: Num rows: 5833 Data size: 1149606 Basic stats: COMPLETE Column stats: COMPLETE + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7 + residual filter predicates: {(_col1 > _col6)} + Statistics: Num rows: 500 Data size: 104497 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 2917 Data size: 574910 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 52304 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2917 Data size: 519226 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2917 Data size: 519226 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -7328,51 +7311,25 @@ STAGE PLANS: Reducer 6 Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - residual filter predicates: {(_col2 > _col1)} - Statistics: Num rows: 13833 Data size: 3721077 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 13833 Data size: 3721077 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3605 Data size: 641690 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 3605 Data size: 641690 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 - Execution mode: llap - Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3605 Data size: 641690 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 3605 Data size: 641690 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3605 Data size: 656110 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 3605 Data size: 656110 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: boolean) - Reducer 9 + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean) + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -7391,8 +7348,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[57][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from src b where b.key not in diff --git a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out index 84b4d9454d..ad6d97b7dd 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out @@ -182,8 +182,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 21, 0, 1, 2, 3, 22, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 67, 68, 69, 70, 66, 73] - selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastStringToBoolean(col 6) -> 21:boolean, CastDoubleToLong(col 4:float) -> 22:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27:int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLong(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51:timestamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 56:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:string, VectorUDFAdaptor(UDFToString(ctimestamp1)) -> 65:string, CastStringGroupToString(col 66:char(10))(children: CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10)) -> 67:string, CastStringGroupToString(col 66:varchar(10))(children: CastStringGroupToVarChar(col 6:string, maxLength 10) -> 66:varchar(10)) -> 68:string, CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 69:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 70:double, CastDoubleToString(col 71:double)(children: FuncSinDoubleToDouble(col 4:float) -> 71:double) -> 66:string, DoubleColAddDoubleColumn(col 71:double, col 72:double)(children: col 71:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double + projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 22, 0, 1, 2, 3, 21, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 67, 68, 69, 70, 66, 73] + selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastLongToBooleanViaLongToLong(col 21:bigint)(children: StringLength(col 6:string) -> 21:bigint) -> 22:boolean, CastDoubleToLong(col 4:float) -> 21:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27:int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLong(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51:timestamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 56:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:string, VectorUDFAdaptor(UDFToString(ctimestamp1)) -> 65:string, CastStringGroupToString(col 66:char(10))(children: CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10)) -> 67:string, CastStringGroupToString(col 66:varchar(10))(children: CastStringGroupToVarChar(col 6:string, maxLength 10) -> 66:varchar(10)) -> 68:string, CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 69:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 70:double, CastDoubleToString(col 71:double)(children: FuncSinDoubleToDouble(col 4:float) -> 71:double) -> 66:string, DoubleColAddDoubleColumn(col 71:double, col 72:double)(children: col 71:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double Statistics: Num rows: 6144 Data size: 16362860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/spark/groupby_position.q.out b/ql/src/test/results/clientpositive/spark/groupby_position.q.out index bcc512be09..a88176fa6b 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_position.q.out @@ -692,946 +692,3 @@ POSTHOOK: Input: default@src 19 val_19 19 val_19 18 val_18 18 val_18 17 val_17 17 val_17 -PREHOOK: query: EXPLAIN -SELECT key FROM src ORDER BY 1 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT key FROM src ORDER BY 1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT key FROM src ORDER BY 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM src ORDER BY 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 -0 -0 -10 -100 -100 -103 -103 -104 -104 -105 -11 -111 -113 -113 -114 -116 -118 -118 -119 -119 -119 -12 -12 -120 -120 -125 -125 -126 -128 -128 -128 -129 -129 -131 -133 -134 -134 -136 -137 -137 -138 -138 -138 -138 -143 -145 -146 -146 -149 -149 -15 -15 -150 -152 -152 -153 -155 -156 -157 -158 -160 -162 -163 -164 -164 -165 -165 -166 -167 -167 -167 -168 -169 -169 -169 -169 -17 -170 -172 -172 -174 -174 -175 -175 -176 -176 -177 -178 -179 -179 -18 -18 -180 -181 -183 -186 -187 -187 -187 -189 -19 -190 -191 -191 -192 -193 -193 -193 -194 -195 -195 -196 -197 -197 -199 -199 -199 -2 -20 -200 -200 -201 -202 -203 -203 -205 -205 -207 -207 -208 -208 -208 -209 -209 -213 -213 -214 -216 -216 -217 -217 -218 -219 -219 -221 -221 -222 -223 -223 -224 -224 -226 -228 -229 -229 -230 -230 -230 -230 -230 -233 -233 -235 -237 -237 -238 -238 -239 -239 -24 -24 -241 -242 -242 -244 -247 -248 -249 -252 -255 -255 -256 -256 -257 -258 -26 -26 -260 -262 -263 -265 -265 -266 -27 -272 -272 -273 -273 -273 -274 -275 -277 -277 -277 -277 -278 -278 -28 -280 -280 -281 -281 -282 -282 -283 -284 -285 -286 -287 -288 -288 -289 -291 -292 -296 -298 -298 -298 -30 -302 -305 -306 -307 -307 -308 -309 -309 -310 -311 -311 -311 -315 -316 -316 -316 -317 -317 -318 -318 -318 -321 -321 -322 -322 -323 -325 -325 -327 -327 -327 -33 -331 -331 -332 -333 -333 -335 -336 -338 -339 -34 -341 -342 -342 -344 -344 -345 -348 -348 -348 -348 -348 -35 -35 -35 -351 -353 -353 -356 -360 -362 -364 -365 -366 -367 -367 -368 -369 -369 -369 -37 -37 -373 -374 -375 -377 -378 -379 -382 -382 -384 -384 -384 -386 -389 -392 -393 -394 -395 -395 -396 -396 -396 -397 -397 -399 -399 -4 -400 -401 -401 -401 -401 -401 -402 -403 -403 -403 -404 -404 -406 -406 -406 -406 -407 -409 -409 -409 -41 -411 -413 -413 -414 -414 -417 -417 -417 -418 -419 -42 -42 -421 -424 -424 -427 -429 -429 -43 -430 -430 -430 -431 -431 -431 -432 -435 -436 -437 -438 -438 -438 -439 -439 -44 -443 -444 -446 -448 -449 -452 -453 -454 -454 -454 -455 -457 -458 -458 -459 -459 -460 -462 -462 -463 -463 -466 -466 -466 -467 -468 -468 -468 -468 -469 -469 -469 -469 -469 -47 -470 -472 -475 -477 -478 -478 -479 -480 -480 -480 -481 -482 -483 -484 -485 -487 -489 -489 -489 -489 -490 -491 -492 -492 -493 -494 -495 -496 -497 -498 -498 -498 -5 -5 -5 -51 -51 -53 -54 -57 -58 -58 -64 -65 -66 -67 -67 -69 -70 -70 -70 -72 -72 -74 -76 -76 -77 -78 -8 -80 -82 -83 -83 -84 -84 -85 -86 -87 -9 -90 -90 -90 -92 -95 -95 -96 -97 -97 -98 -98 -PREHOOK: query: EXPLAIN -SELECT distinct key FROM src ORDER BY 1 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT distinct key FROM src ORDER BY 1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT distinct key FROM src ORDER BY 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT distinct key FROM src ORDER BY 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 -10 -100 -103 -104 -105 -11 -111 -113 -114 -116 -118 -119 -12 -120 -125 -126 -128 -129 -131 -133 -134 -136 -137 -138 -143 -145 -146 -149 -15 -150 -152 -153 -155 -156 -157 -158 -160 -162 -163 -164 -165 -166 -167 -168 -169 -17 -170 -172 -174 -175 -176 -177 -178 -179 -18 -180 -181 -183 -186 -187 -189 -19 -190 -191 -192 -193 -194 -195 -196 -197 -199 -2 -20 -200 -201 -202 -203 -205 -207 -208 -209 -213 -214 -216 -217 -218 -219 -221 -222 -223 -224 -226 -228 -229 -230 -233 -235 -237 -238 -239 -24 -241 -242 -244 -247 -248 -249 -252 -255 -256 -257 -258 -26 -260 -262 -263 -265 -266 -27 -272 -273 -274 -275 -277 -278 -28 -280 -281 -282 -283 -284 -285 -286 -287 -288 -289 -291 -292 -296 -298 -30 -302 -305 -306 -307 -308 -309 -310 -311 -315 -316 -317 -318 -321 -322 -323 -325 -327 -33 -331 -332 -333 -335 -336 -338 -339 -34 -341 -342 -344 -345 -348 -35 -351 -353 -356 -360 -362 -364 -365 -366 -367 -368 -369 -37 -373 -374 -375 -377 -378 -379 -382 -384 -386 -389 -392 -393 -394 -395 -396 -397 -399 -4 -400 -401 -402 -403 -404 -406 -407 -409 -41 -411 -413 -414 -417 -418 -419 -42 -421 -424 -427 -429 -43 -430 -431 -432 -435 -436 -437 -438 -439 -44 -443 -444 -446 -448 -449 -452 -453 -454 -455 -457 -458 -459 -460 -462 -463 -466 -467 -468 -469 -47 -470 -472 -475 -477 -478 -479 -480 -481 -482 -483 -484 -485 -487 -489 -490 -491 -492 -493 -494 -495 -496 -497 -498 -5 -51 -53 -54 -57 -58 -64 -65 -66 -67 -69 -70 -72 -74 -76 -77 -78 -8 -80 -82 -83 -84 -85 -86 -87 -9 -90 -92 -95 -96 -97 -98 diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out index 6a4bea1bd4..1f09dd3127 100644 --- a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out @@ -2263,71 +2263,92 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (PARTITION-LEVEL SORT), Reducer 5 (PARTITION-LEVEL SORT) -Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT), Reducer 7 (PARTITION-LEVEL SORT) -Reducer 5 <- Map 4 (GROUP) -Reducer 7 <- Map 6 (GROUP) +Reducer 10 <- Map 9 (GROUP) +Reducer 2 <- Map 1 (PARTITION-LEVEL SORT), Reducer 6 (PARTITION-LEVEL SORT) +Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT), Reducer 2 (PARTITION-LEVEL SORT) +Reducer 5 <- Map 4 (PARTITION-LEVEL SORT), Reducer 8 (PARTITION-LEVEL SORT) +Reducer 6 <- Reducer 5 (GROUP) +Reducer 8 <- Map 7 (GROUP) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 3 - File Output Operator [FS_26] - Select Operator [SEL_25] (rows=13 width=223) + File Output Operator [FS_36] + Select Operator [SEL_35] (rows=13 width=223) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_24] (rows=13 width=231) + Filter Operator [FIL_34] (rows=13 width=231) predicate:(not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col4 is null) THEN (false) WHEN (_col8 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END) - Join Operator [JOIN_23] (rows=26 width=230) + Join Operator [JOIN_33] (rows=26 width=230) Output:["_col0","_col1","_col2","_col4","_col5","_col8"],condition map:[{"":"{\"type\":\"Left Outer\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0, _col1","1":"_col0, _col1"} + <-Reducer 10 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_32] + PartitionCols:_col0, _col1 + Select Operator [SEL_27] (rows=4 width=223) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_26] (rows=4 width=219) + predicate:_col0 is not null + Group By Operator [GBY_24] (rows=4 width=219) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 9 [GROUP] + GROUP [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=4 width=219) + Output:["_col0","_col1"],keys:p_name, p_mfgr + Select Operator [SEL_21] (rows=8 width=223) + Output:["p_name","p_mfgr"] + Filter Operator [FIL_38] (rows=8 width=223) + predicate:((p_size < 10) and p_mfgr is not null) + TableScan [TS_19] (rows=26 width=223) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] <-Reducer 2 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_21] + PARTITION-LEVEL SORT [RS_31] PartitionCols:_col0, _col1 - Join Operator [JOIN_20] (rows=26 width=229) + Join Operator [JOIN_30] (rows=26 width=229) Output:["_col0","_col1","_col2","_col4","_col5"],condition map:[{"":"{\"type\":\"Left Outer\",\"left\":0,\"right\":1}"}],keys:{"0":"_col1","1":"_col0"} <-Map 1 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_18] + PARTITION-LEVEL SORT [RS_28] PartitionCols:_col1 Select Operator [SEL_1] (rows=26 width=223) Output:["_col0","_col1","_col2"] TableScan [TS_0] (rows=26 width=223) default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] - <-Reducer 5 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_19] + <-Reducer 6 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_29] PartitionCols:_col0 - Group By Operator [GBY_7] (rows=2 width=114) + Group By Operator [GBY_17] (rows=2 width=114) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 4 [GROUP] - GROUP [RS_6] + <-Reducer 5 [GROUP] + GROUP [RS_16] PartitionCols:_col0 - Group By Operator [GBY_5] (rows=2 width=114) - Output:["_col0","_col1","_col2"],aggregations:["count()","count(p_name)"],keys:p_mfgr - Select Operator [SEL_4] (rows=8 width=223) - Output:["p_name","p_mfgr"] - Filter Operator [FIL_27] (rows=8 width=223) - predicate:((p_size < 10) and p_mfgr is not null) - TableScan [TS_2] (rows=26 width=223) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] - <-Reducer 7 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_22] - PartitionCols:_col0, _col1 - Select Operator [SEL_17] (rows=4 width=223) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_16] (rows=4 width=219) - predicate:_col0 is not null - Group By Operator [GBY_14] (rows=4 width=219) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Map 6 [GROUP] - GROUP [RS_13] - PartitionCols:_col0, _col1 - Group By Operator [GBY_12] (rows=4 width=219) - Output:["_col0","_col1"],keys:p_name, p_mfgr - Select Operator [SEL_11] (rows=8 width=223) - Output:["p_name","p_mfgr"] - Filter Operator [FIL_28] (rows=8 width=223) - predicate:((p_size < 10) and p_mfgr is not null) - TableScan [TS_9] (rows=26 width=223) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] + Group By Operator [GBY_15] (rows=2 width=114) + Output:["_col0","_col1","_col2"],aggregations:["count()","count(_col1)"],keys:_col2 + Join Operator [JOIN_13] (rows=8 width=219) + Output:["_col1","_col2"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"} + <-Map 4 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_11] + PartitionCols:_col0 + Select Operator [SEL_4] (rows=8 width=219) + Output:["_col0","_col1"] + Filter Operator [FIL_37] (rows=8 width=223) + predicate:(p_size < 10) + TableScan [TS_2] (rows=26 width=223) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] + <-Reducer 8 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_12] + PartitionCols:_col0 + Group By Operator [GBY_9] (rows=5 width=98) + Output:["_col0"],keys:KEY._col0 + <-Map 7 [GROUP] + GROUP [RS_8] + PartitionCols:_col0 + Group By Operator [GBY_7] (rows=5 width=98) + Output:["_col0"],keys:p_mfgr + Select Operator [SEL_6] (rows=26 width=98) + Output:["p_mfgr"] + TableScan [TS_5] (rows=26 width=98) + default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr"] PREHOOK: query: explain select p_name, p_size from @@ -2427,88 +2448,126 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 6 (GROUP) +Reducer 11 <- Map 1 (GROUP) +Reducer 12 <- Reducer 11 (GROUP) +Reducer 14 <- Map 13 (GROUP) +Reducer 15 <- Reducer 14 (PARTITION-LEVEL SORT), Reducer 18 (PARTITION-LEVEL SORT) +Reducer 18 <- Reducer 11 (GROUP) Reducer 2 <- Map 1 (GROUP) -Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT), Reducer 8 (PARTITION-LEVEL SORT) -Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT), Reducer 3 (PARTITION-LEVEL SORT) +Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT), Reducer 9 (PARTITION-LEVEL SORT) +Reducer 4 <- Reducer 15 (PARTITION-LEVEL SORT), Reducer 3 (PARTITION-LEVEL SORT) Reducer 5 <- Reducer 4 (SORT) -Reducer 7 <- Map 6 (GROUP) -Reducer 8 <- Reducer 7 (GROUP) +Reducer 7 <- Map 13 (GROUP) +Reducer 8 <- Reducer 12 (PARTITION-LEVEL SORT), Reducer 7 (PARTITION-LEVEL SORT) +Reducer 9 <- Reducer 8 (GROUP) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 5 - File Output Operator [FS_37] - Select Operator [SEL_36] (rows=3 width=106) + File Output Operator [FS_66] + Select Operator [SEL_65] (rows=3 width=106) Output:["_col0","_col1"] <-Reducer 4 [SORT] - SORT [RS_35] - Select Operator [SEL_34] (rows=3 width=106) + SORT [RS_64] + Select Operator [SEL_63] (rows=3 width=106) Output:["_col0","_col1"] - Filter Operator [FIL_33] (rows=3 width=119) + Filter Operator [FIL_62] (rows=3 width=119) predicate:(not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) - Join Operator [JOIN_32] (rows=5 width=114) + Join Operator [JOIN_61] (rows=5 width=114) Output:["_col0","_col1","_col3","_col4","_col7"],condition map:[{"":"{\"type\":\"Left Outer\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0, _col1","1":"_col0, _col1"} - <-Reducer 10 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_31] + <-Reducer 15 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_60] PartitionCols:_col0, _col1 - Select Operator [SEL_26] (rows=1 width=110) + Select Operator [SEL_55] (rows=1 width=110) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_25] (rows=1 width=110) + Filter Operator [FIL_54] (rows=1 width=110) predicate:_col0 is not null - Select Operator [SEL_24] (rows=1 width=110) + Select Operator [SEL_53] (rows=1 width=110) Output:["_col0","_col1"] - Filter Operator [FIL_23] (rows=1 width=114) - predicate:(((_col2 - _col1) > 600.0) and _col1 is not null) - Group By Operator [GBY_22] (rows=5 width=114) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 - <-Map 6 [GROUP] - GROUP [RS_9] - PartitionCols:_col0 - Group By Operator [GBY_8] (rows=5 width=114) - Output:["_col0","_col1","_col2"],aggregations:["min(p_retailprice)","max(p_retailprice)"],keys:p_mfgr - Select Operator [SEL_7] (rows=26 width=106) - Output:["p_mfgr","p_retailprice"] - TableScan [TS_6] (rows=26 width=106) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"] + Join Operator [JOIN_52] (rows=1 width=106) + Output:["_col0","_col3"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col1","1":"_col0"} + <-Reducer 14 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_50] + PartitionCols:_col1 + Select Operator [SEL_38] (rows=1 width=114) + Output:["_col0","_col1"] + Filter Operator [FIL_37] (rows=1 width=114) + predicate:((_col2 - _col1) > 600.0) + Group By Operator [GBY_36] (rows=5 width=114) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 + <-Map 13 [GROUP] + GROUP [RS_35] + PartitionCols:_col0 + Group By Operator [GBY_34] (rows=5 width=114) + Output:["_col0","_col1","_col2"],aggregations:["min(p_retailprice)","max(p_retailprice)"],keys:p_mfgr + Select Operator [SEL_33] (rows=26 width=106) + Output:["p_mfgr","p_retailprice"] + TableScan [TS_32] (rows=26 width=106) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"] + <-Reducer 18 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_51] + PartitionCols:_col0 + Group By Operator [GBY_48] (rows=2 width=8) + Output:["_col0"],keys:KEY._col0 + <-Reducer 11 [GROUP] + GROUP [RS_21] + PartitionCols:_col0 + Group By Operator [GBY_20] (rows=2 width=8) + Output:["_col0"],keys:_col1 + Select Operator [SEL_18] (rows=5 width=106) + Output:["_col1"] + Group By Operator [GBY_17] (rows=5 width=106) + Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0 + <-Map 1 [GROUP] + GROUP [RS_3] + PartitionCols:_col0 + Group By Operator [GBY_2] (rows=5 width=106) + Output:["_col0","_col1"],aggregations:["min(p_retailprice)"],keys:p_mfgr + Select Operator [SEL_1] (rows=26 width=106) + Output:["p_mfgr","p_retailprice"] + TableScan [TS_0] (rows=26 width=106) + default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"] <-Reducer 3 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_30] + PARTITION-LEVEL SORT [RS_59] PartitionCols:_col0, _col1 - Join Operator [JOIN_29] (rows=5 width=112) + Join Operator [JOIN_58] (rows=5 width=112) Output:["_col0","_col1","_col3","_col4"],condition map:[{"":"{\"type\":\"Left Outer\",\"left\":0,\"right\":1}"}],keys:{"0":"_col1","1":"_col0"} <-Reducer 2 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_27] + PARTITION-LEVEL SORT [RS_56] PartitionCols:_col1 Group By Operator [GBY_4] (rows=5 width=106) Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0 - <-Map 1 [GROUP] - GROUP [RS_3] - PartitionCols:_col0 - Group By Operator [GBY_2] (rows=5 width=106) - Output:["_col0","_col1"],aggregations:["min(p_retailprice)"],keys:p_mfgr - Select Operator [SEL_1] (rows=26 width=106) - Output:["p_mfgr","p_retailprice"] - TableScan [TS_0] (rows=26 width=106) - default@part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_retailprice"] - <-Reducer 8 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_28] + <- Please refer to the previous Map 1 [GROUP] + <-Reducer 9 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_57] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=1 width=24) + Group By Operator [GBY_30] (rows=1 width=24) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Reducer 7 [GROUP] - GROUP [RS_15] + <-Reducer 8 [GROUP] + GROUP [RS_29] PartitionCols:_col0 - Group By Operator [GBY_14] (rows=1 width=24) - Output:["_col0","_col1","_col2"],aggregations:["count()","count(_col0)"],keys:_col1 - Select Operator [SEL_12] (rows=1 width=114) - Output:["_col0","_col1"] - Filter Operator [FIL_11] (rows=1 width=114) - predicate:(((_col2 - _col1) > 600.0) and _col1 is not null) - Group By Operator [GBY_10] (rows=5 width=114) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 - <- Please refer to the previous Map 6 [GROUP] + Group By Operator [GBY_28] (rows=1 width=24) + Output:["_col0","_col1","_col2"],aggregations:["count()","count(_col0)"],keys:_col3 + Join Operator [JOIN_26] (rows=1 width=106) + Output:["_col0","_col3"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col1","1":"_col0"} + <-Reducer 12 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_25] + PartitionCols:_col0 + Group By Operator [GBY_22] (rows=2 width=8) + Output:["_col0"],keys:KEY._col0 + <- Please refer to the previous Reducer 11 [GROUP] + <-Reducer 7 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_24] + PartitionCols:_col1 + Select Operator [SEL_12] (rows=1 width=114) + Output:["_col0","_col1"] + Filter Operator [FIL_11] (rows=1 width=114) + predicate:((_col2 - _col1) > 600.0) + Group By Operator [GBY_10] (rows=5 width=114) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 + <- Please refer to the previous Map 13 [GROUP] PREHOOK: query: explain select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from cbo_t1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out index fb13fb73e9..9d9957bbd9 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out @@ -1287,7 +1287,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - residual filter predicates: {(_col1 <> _col3)} + residual filter predicates: {(_col3 <> _col1)} Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out index e19240b7ca..aada175d5e 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -844,7 +844,6 @@ POSTHOOK: Input: default@src 97 val_97 98 val_98 98 val_98 -Warning: Shuffle Join JOIN[15][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product PREHOOK: query: explain select * from src b where b.key in (select distinct key from src a where a.value > b.value) PREHOOK: type: QUERY POSTHOOK: query: explain select * from src b where b.key in (select distinct key from src a where a.value > b.value) @@ -858,8 +857,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (GROUP, 2) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -868,50 +866,35 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Map 5 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: value (type: string) + keys: key (type: string), value (type: string) mode: hash - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -919,55 +902,40 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 45832 Data size: 1019683 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 45832 Data size: 1019683 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + residual filter predicates: {(_col3 > _col1)} + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 125000 Data size: 2781000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > _col2) (type: boolean) - Statistics: Num rows: 41666 Data size: 926985 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 41666 Data size: 926985 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 41666 Data size: 926985 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41666 Data size: 926985 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -975,7 +943,6 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[15][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product PREHOOK: query: select * from src b where b.key in (select distinct key from src a where a.value > b.value) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -984,503 +951,6 @@ POSTHOOK: query: select * from src b where b.key in (select distinct key from sr POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -10 val_10 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -131 val_131 -133 val_133 -134 val_134 -134 val_134 -136 val_136 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -145 val_145 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -15 val_15 -15 val_15 -150 val_150 -152 val_152 -152 val_152 -153 val_153 -155 val_155 -156 val_156 -157 val_157 -158 val_158 -160 val_160 -162 val_162 -163 val_163 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -17 val_17 -170 val_170 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -177 val_177 -178 val_178 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -180 val_180 -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -19 val_19 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2 -20 val_20 -200 val_200 -200 val_200 -201 val_201 -202 val_202 -203 val_203 -203 val_203 -205 val_205 -205 val_205 -207 val_207 -207 val_207 -208 val_208 -208 val_208 -208 val_208 -209 val_209 -209 val_209 -213 val_213 -213 val_213 -214 val_214 -216 val_216 -216 val_216 -217 val_217 -217 val_217 -218 val_218 -219 val_219 -219 val_219 -221 val_221 -221 val_221 -222 val_222 -223 val_223 -223 val_223 -224 val_224 -224 val_224 -226 val_226 -228 val_228 -229 val_229 -229 val_229 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -230 val_230 -233 val_233 -233 val_233 -235 val_235 -237 val_237 -237 val_237 -238 val_238 -238 val_238 -239 val_239 -239 val_239 -24 val_24 -24 val_24 -241 val_241 -242 val_242 -242 val_242 -244 val_244 -247 val_247 -248 val_248 -249 val_249 -252 val_252 -255 val_255 -255 val_255 -256 val_256 -256 val_256 -257 val_257 -258 val_258 -26 val_26 -26 val_26 -260 val_260 -262 val_262 -263 val_263 -265 val_265 -265 val_265 -266 val_266 -27 val_27 -272 val_272 -272 val_272 -273 val_273 -273 val_273 -273 val_273 -274 val_274 -275 val_275 -277 val_277 -277 val_277 -277 val_277 -277 val_277 -278 val_278 -278 val_278 -28 val_28 -280 val_280 -280 val_280 -281 val_281 -281 val_281 -282 val_282 -282 val_282 -283 val_283 -284 val_284 -285 val_285 -286 val_286 -287 val_287 -288 val_288 -288 val_288 -289 val_289 -291 val_291 -292 val_292 -296 val_296 -298 val_298 -298 val_298 -298 val_298 -30 val_30 -302 val_302 -305 val_305 -306 val_306 -307 val_307 -307 val_307 -308 val_308 -309 val_309 -309 val_309 -310 val_310 -311 val_311 -311 val_311 -311 val_311 -315 val_315 -316 val_316 -316 val_316 -316 val_316 -317 val_317 -317 val_317 -318 val_318 -318 val_318 -318 val_318 -321 val_321 -321 val_321 -322 val_322 -322 val_322 -323 val_323 -325 val_325 -325 val_325 -327 val_327 -327 val_327 -327 val_327 -33 val_33 -331 val_331 -331 val_331 -332 val_332 -333 val_333 -333 val_333 -335 val_335 -336 val_336 -338 val_338 -339 val_339 -34 val_34 -341 val_341 -342 val_342 -342 val_342 -344 val_344 -344 val_344 -345 val_345 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -348 val_348 -35 val_35 -35 val_35 -35 val_35 -351 val_351 -353 val_353 -353 val_353 -356 val_356 -360 val_360 -362 val_362 -364 val_364 -365 val_365 -366 val_366 -367 val_367 -367 val_367 -368 val_368 -369 val_369 -369 val_369 -369 val_369 -37 val_37 -37 val_37 -373 val_373 -374 val_374 -375 val_375 -377 val_377 -378 val_378 -379 val_379 -382 val_382 -382 val_382 -384 val_384 -384 val_384 -384 val_384 -386 val_386 -389 val_389 -392 val_392 -393 val_393 -394 val_394 -395 val_395 -395 val_395 -396 val_396 -396 val_396 -396 val_396 -397 val_397 -397 val_397 -399 val_399 -399 val_399 -4 val_4 -400 val_400 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -402 val_402 -403 val_403 -403 val_403 -403 val_403 -404 val_404 -404 val_404 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -407 val_407 -409 val_409 -409 val_409 -409 val_409 -41 val_41 -411 val_411 -413 val_413 -413 val_413 -414 val_414 -414 val_414 -417 val_417 -417 val_417 -417 val_417 -418 val_418 -419 val_419 -42 val_42 -42 val_42 -421 val_421 -424 val_424 -424 val_424 -427 val_427 -429 val_429 -429 val_429 -43 val_43 -430 val_430 -430 val_430 -430 val_430 -431 val_431 -431 val_431 -431 val_431 -432 val_432 -435 val_435 -436 val_436 -437 val_437 -438 val_438 -438 val_438 -438 val_438 -439 val_439 -439 val_439 -44 val_44 -443 val_443 -444 val_444 -446 val_446 -448 val_448 -449 val_449 -452 val_452 -453 val_453 -454 val_454 -454 val_454 -454 val_454 -455 val_455 -457 val_457 -458 val_458 -458 val_458 -459 val_459 -459 val_459 -460 val_460 -462 val_462 -462 val_462 -463 val_463 -463 val_463 -466 val_466 -466 val_466 -466 val_466 -467 val_467 -468 val_468 -468 val_468 -468 val_468 -468 val_468 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -469 val_469 -47 val_47 -470 val_470 -472 val_472 -475 val_475 -477 val_477 -478 val_478 -478 val_478 -479 val_479 -480 val_480 -480 val_480 -480 val_480 -481 val_481 -482 val_482 -483 val_483 -484 val_484 -485 val_485 -487 val_487 -489 val_489 -489 val_489 -489 val_489 -489 val_489 -490 val_490 -491 val_491 -492 val_492 -492 val_492 -493 val_493 -494 val_494 -495 val_495 -496 val_496 -497 val_497 -498 val_498 -498 val_498 -498 val_498 -5 val_5 -5 val_5 -5 val_5 -51 val_51 -51 val_51 -53 val_53 -54 val_54 -57 val_57 -58 val_58 -58 val_58 -64 val_64 -65 val_65 -66 val_66 -67 val_67 -67 val_67 -69 val_69 -70 val_70 -70 val_70 -70 val_70 -72 val_72 -72 val_72 -74 val_74 -76 val_76 -76 val_76 -77 val_77 -78 val_78 -8 val_8 -80 val_80 -82 val_82 -83 val_83 -83 val_83 -84 val_84 -84 val_84 -85 val_85 -86 val_86 -87 val_87 -9 val_9 -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 PREHOOK: query: select p_mfgr, p_name, p_size from part where part.p_size in diff --git a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out index a4282df08a..edc1fb7f8c 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out @@ -234,8 +234,8 @@ POSTHOOK: Input: default@part_null 17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the 45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful 48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 78487 NULL Manufacturer#6 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product PREHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null) diff --git a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out index 0d12d0db60..83b1b50e3d 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out @@ -6950,7 +6950,6 @@ PREHOOK: type: DROPTABLE POSTHOOK: query: drop table t1 POSTHOOK: type: DROPTABLE Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 5' is a cross product -Warning: Shuffle Join JOIN[31][tables = [$hdt$_2, $hdt$_3]] in Work 'Reducer 10' is a cross product PREHOOK: query: explain select * from src b where b.key not in @@ -6975,14 +6974,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 9 (PARTITION-LEVEL SORT, 1), Reducer 13 (PARTITION-LEVEL SORT, 1) - Reducer 11 <- Reducer 10 (GROUP, 2) - Reducer 13 <- Map 12 (GROUP, 2) + Reducer 10 <- Map 9 (GROUP, 2) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) Reducer 6 <- Reducer 5 (GROUP, 2) - Reducer 8 <- Map 12 (GROUP, 2) + Reducer 8 <- Map 7 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -7000,7 +6997,23 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 12 + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Map 7 Map Operator Tree: TableScan alias: b @@ -7019,96 +7032,44 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) Map 9 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > '9') (type: boolean) + predicate: ((key > '9') and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Reducer 10 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 41500 Data size: 923146 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col2 > _col1) (type: boolean) - Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), _col2 (type: string) + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE - Reducer 11 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 10 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6916 Data size: 153842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 6916 Data size: 153842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6916 Data size: 153842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6916 Data size: 153842 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: boolean) - Reducer 13 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: boolean) Reducer 2 Reduce Operator Tree: Join Operator @@ -7120,20 +7081,21 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 7607 Data size: 169226 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 7607 Data size: 169226 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: bigint), _col4 (type: bigint) + value expressions: _col1 (type: string), _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4, _col7 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7 + residual filter predicates: {(_col1 > _col6)} Statistics: Num rows: 8367 Data size: 186148 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) @@ -7211,7 +7173,6 @@ STAGE PLANS: ListSink Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 5' is a cross product -Warning: Shuffle Join JOIN[31][tables = [$hdt$_2, $hdt$_3]] in Work 'Reducer 10' is a cross product PREHOOK: query: select * from src b where b.key not in @@ -7232,67 +7193,85 @@ where b.key not in POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 10 val_10 +100 val_100 +100 val_100 103 val_103 103 val_103 -105 val_105 +104 val_104 +104 val_104 +111 val_111 +114 val_114 116 val_116 120 val_120 120 val_120 -125 val_125 -125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 129 val_129 129 val_129 131 val_131 +133 val_133 134 val_134 134 val_134 -136 val_136 -143 val_143 145 val_145 +146 val_146 +146 val_146 149 val_149 149 val_149 -150 val_150 +15 val_15 +15 val_15 +152 val_152 +152 val_152 155 val_155 157 val_157 -158 val_158 -160 val_160 -163 val_163 -164 val_164 -164 val_164 -166 val_166 +169 val_169 +169 val_169 +169 val_169 +169 val_169 17 val_17 170 val_170 172 val_172 172 val_172 -180 val_180 -183 val_183 +178 val_178 +179 val_179 +179 val_179 +181 val_181 +187 val_187 +187 val_187 +187 val_187 189 val_189 19 val_19 -190 val_190 191 val_191 191 val_191 -193 val_193 -193 val_193 -193 val_193 +192 val_192 +194 val_194 195 val_195 195 val_195 -196 val_196 +199 val_199 +199 val_199 +199 val_199 +2 val_2 20 val_20 -205 val_205 -205 val_205 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 207 val_207 207 val_207 -209 val_209 -209 val_209 +208 val_208 +208 val_208 +208 val_208 213 val_213 213 val_213 +214 val_214 216 val_216 216 val_216 -217 val_217 -217 val_217 +218 val_218 +222 val_222 223 val_223 223 val_223 224 val_224 @@ -7303,79 +7282,78 @@ POSTHOOK: Input: default@src 233 val_233 233 val_233 235 val_235 -238 val_238 -238 val_238 +237 val_237 +237 val_237 239 val_239 239 val_239 24 val_24 24 val_24 241 val_241 244 val_244 -247 val_247 -248 val_248 255 val_255 255 val_255 +256 val_256 +256 val_256 +257 val_257 258 val_258 26 val_26 26 val_26 -260 val_260 -263 val_263 +262 val_262 265 val_265 265 val_265 266 val_266 -272 val_272 -272 val_272 +27 val_27 273 val_273 273 val_273 273 val_273 -274 val_274 +275 val_275 +278 val_278 +278 val_278 28 val_28 -281 val_281 -281 val_281 +280 val_280 +280 val_280 +282 val_282 +282 val_282 +284 val_284 +285 val_285 286 val_286 +287 val_287 +288 val_288 +288 val_288 291 val_291 -296 val_296 +292 val_292 298 val_298 298 val_298 298 val_298 30 val_30 302 val_302 305 val_305 -306 val_306 -307 val_307 -307 val_307 -308 val_308 -309 val_309 -309 val_309 -315 val_315 +310 val_310 +311 val_311 +311 val_311 +311 val_311 316 val_316 316 val_316 316 val_316 -317 val_317 -317 val_317 -318 val_318 -318 val_318 -318 val_318 321 val_321 321 val_321 -325 val_325 -325 val_325 +323 val_323 33 val_33 -331 val_331 -331 val_331 332 val_332 +333 val_333 +333 val_333 335 val_335 +336 val_336 +338 val_338 339 val_339 -342 val_342 -342 val_342 -345 val_345 +344 val_344 +344 val_344 353 val_353 353 val_353 356 val_356 360 val_360 -366 val_366 -367 val_367 -367 val_367 +362 val_362 +364 val_364 368 val_368 369 val_369 369 val_369 @@ -7383,124 +7361,101 @@ POSTHOOK: Input: default@src 37 val_37 37 val_37 373 val_373 -375 val_375 377 val_377 378 val_378 -379 val_379 -382 val_382 -382 val_382 384 val_384 384 val_384 384 val_384 386 val_386 -389 val_389 -394 val_394 +392 val_392 395 val_395 395 val_395 396 val_396 396 val_396 396 val_396 -399 val_399 -399 val_399 -400 val_400 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 +397 val_397 +397 val_397 402 val_402 -406 val_406 -406 val_406 -406 val_406 -406 val_406 +404 val_404 +404 val_404 407 val_407 -41 val_41 -413 val_413 -413 val_413 -414 val_414 -414 val_414 +411 val_411 +417 val_417 +417 val_417 +417 val_417 +418 val_418 +419 val_419 42 val_42 42 val_42 +424 val_424 +424 val_424 +43 val_43 430 val_430 430 val_430 430 val_430 -431 val_431 -431 val_431 -431 val_431 +432 val_432 436 val_436 -44 val_44 -446 val_446 +437 val_437 +444 val_444 448 val_448 449 val_449 -452 val_452 +453 val_453 +454 val_454 +454 val_454 +454 val_454 +457 val_457 459 val_459 459 val_459 -462 val_462 -462 val_462 -466 val_466 -466 val_466 -466 val_466 -467 val_467 468 val_468 468 val_468 468 val_468 468 val_468 47 val_47 -472 val_472 -480 val_480 -480 val_480 -480 val_480 +477 val_477 +479 val_479 +481 val_481 482 val_482 -484 val_484 +483 val_483 485 val_485 -487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 490 val_490 +492 val_492 +492 val_492 493 val_493 494 val_494 495 val_495 496 val_496 497 val_497 -498 val_498 -498 val_498 -498 val_498 -5 val_5 -5 val_5 -5 val_5 -51 val_51 -51 val_51 -54 val_54 57 val_57 -58 val_58 -58 val_58 65 val_65 -66 val_66 -69 val_69 -70 val_70 -70 val_70 -70 val_70 +67 val_67 +67 val_67 +72 val_72 +72 val_72 74 val_74 -77 val_77 -78 val_78 +76 val_76 +76 val_76 8 val_8 80 val_80 -84 val_84 -84 val_84 +82 val_82 85 val_85 +86 val_86 87 val_87 9 val_9 92 val_92 -95 val_95 -95 val_95 96 val_96 -100 val_100 -100 val_100 -104 val_104 -104 val_104 +97 val_97 +97 val_97 +0 val_0 +0 val_0 +0 val_0 +105 val_105 11 val_11 -111 val_111 113 val_113 113 val_113 -114 val_114 118 val_118 118 val_118 119 val_119 @@ -7508,36 +7463,32 @@ POSTHOOK: Input: default@src 119 val_119 12 val_12 12 val_12 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -133 val_133 +125 val_125 +125 val_125 +136 val_136 137 val_137 137 val_137 138 val_138 138 val_138 138 val_138 138 val_138 -146 val_146 -146 val_146 -15 val_15 -15 val_15 -152 val_152 -152 val_152 +143 val_143 +150 val_150 153 val_153 156 val_156 +158 val_158 +160 val_160 162 val_162 +163 val_163 +164 val_164 +164 val_164 165 val_165 165 val_165 +166 val_166 167 val_167 167 val_167 167 val_167 168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 174 val_174 174 val_174 175 val_175 @@ -7545,94 +7496,82 @@ POSTHOOK: Input: default@src 176 val_176 176 val_176 177 val_177 -178 val_178 -179 val_179 -179 val_179 18 val_18 18 val_18 -181 val_181 +180 val_180 +183 val_183 186 val_186 -187 val_187 -187 val_187 -187 val_187 -192 val_192 -194 val_194 +190 val_190 +193 val_193 +193 val_193 +193 val_193 +196 val_196 197 val_197 197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2 -200 val_200 -200 val_200 -201 val_201 -202 val_202 -203 val_203 -203 val_203 -208 val_208 -208 val_208 -208 val_208 -214 val_214 -218 val_218 +205 val_205 +205 val_205 +209 val_209 +209 val_209 +217 val_217 +217 val_217 219 val_219 219 val_219 221 val_221 221 val_221 -222 val_222 226 val_226 230 val_230 230 val_230 230 val_230 230 val_230 230 val_230 -237 val_237 -237 val_237 +238 val_238 +238 val_238 242 val_242 242 val_242 +247 val_247 +248 val_248 249 val_249 252 val_252 -256 val_256 -256 val_256 -257 val_257 -262 val_262 -27 val_27 -275 val_275 +260 val_260 +263 val_263 +272 val_272 +272 val_272 +274 val_274 277 val_277 277 val_277 277 val_277 277 val_277 -278 val_278 -278 val_278 -280 val_280 -280 val_280 -282 val_282 -282 val_282 +281 val_281 +281 val_281 283 val_283 -284 val_284 -285 val_285 -287 val_287 -288 val_288 -288 val_288 289 val_289 -292 val_292 -310 val_310 -311 val_311 -311 val_311 -311 val_311 +296 val_296 +306 val_306 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +315 val_315 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 322 val_322 322 val_322 -323 val_323 +325 val_325 +325 val_325 327 val_327 327 val_327 327 val_327 -333 val_333 -333 val_333 -336 val_336 -338 val_338 +331 val_331 +331 val_331 34 val_34 341 val_341 -344 val_344 -344 val_344 +342 val_342 +342 val_342 +345 val_345 348 val_348 348 val_348 348 val_348 @@ -7642,93 +7581,115 @@ POSTHOOK: Input: default@src 35 val_35 35 val_35 351 val_351 -362 val_362 -364 val_364 365 val_365 +366 val_366 +367 val_367 +367 val_367 374 val_374 -392 val_392 +375 val_375 +379 val_379 +382 val_382 +382 val_382 +389 val_389 393 val_393 -397 val_397 -397 val_397 +394 val_394 +399 val_399 +399 val_399 4 val_4 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 403 val_403 403 val_403 403 val_403 -404 val_404 -404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 409 val_409 409 val_409 409 val_409 -411 val_411 -417 val_417 -417 val_417 -417 val_417 -418 val_418 -419 val_419 +41 val_41 +413 val_413 +413 val_413 +414 val_414 +414 val_414 421 val_421 -424 val_424 -424 val_424 427 val_427 429 val_429 429 val_429 -43 val_43 -432 val_432 +431 val_431 +431 val_431 +431 val_431 435 val_435 -437 val_437 438 val_438 438 val_438 438 val_438 439 val_439 439 val_439 +44 val_44 443 val_443 -444 val_444 -453 val_453 -454 val_454 -454 val_454 -454 val_454 +446 val_446 +452 val_452 455 val_455 -457 val_457 458 val_458 458 val_458 460 val_460 +462 val_462 +462 val_462 463 val_463 463 val_463 +466 val_466 +466 val_466 +466 val_466 +467 val_467 469 val_469 469 val_469 469 val_469 469 val_469 469 val_469 470 val_470 +472 val_472 475 val_475 -477 val_477 478 val_478 478 val_478 -479 val_479 -481 val_481 -483 val_483 -489 val_489 -489 val_489 -489 val_489 -489 val_489 +480 val_480 +480 val_480 +480 val_480 +484 val_484 +487 val_487 491 val_491 -492 val_492 -492 val_492 +498 val_498 +498 val_498 +498 val_498 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 53 val_53 +54 val_54 +58 val_58 +58 val_58 64 val_64 -67 val_67 -67 val_67 -72 val_72 -72 val_72 -76 val_76 -76 val_76 -82 val_82 +66 val_66 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +77 val_77 +78 val_78 83 val_83 83 val_83 -86 val_86 +84 val_84 +84 val_84 90 val_90 90 val_90 90 val_90 -97 val_97 -97 val_97 +95 val_95 +95 val_95 98 val_98 98 val_98 diff --git a/ql/src/test/results/clientpositive/subquery_exists.q.out b/ql/src/test/results/clientpositive/subquery_exists.q.out index b6b31aaf47..898674be49 100644 --- a/ql/src/test/results/clientpositive/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/subquery_exists.q.out @@ -1269,7 +1269,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - residual filter predicates: {(_col1 <> _col3)} + residual filter predicates: {(_col3 <> _col1)} Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) diff --git a/ql/src/test/results/clientpositive/subquery_notexists.q.out b/ql/src/test/results/clientpositive/subquery_notexists.q.out index a6175f8fec..dfe256f6d0 100644 --- a/ql/src/test/results/clientpositive/subquery_notexists.q.out +++ b/ql/src/test/results/clientpositive/subquery_notexists.q.out @@ -510,7 +510,7 @@ POSTHOOK: Input: default@src 199 val_199 199 val_199 2 val_2 -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from src b @@ -530,115 +530,40 @@ where not exists ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-2 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value > 'val_2') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (value > 'val_2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 41500 Data size: 923146 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 > _col2) and (_col3 <> _col1)) (type: boolean) - Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string), _col3 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6916 Data size: 153842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6916 Data size: 153842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -657,36 +582,34 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6916 Data size: 153842 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: boolean) + sort order: + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 7607 Data size: 169226 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + residual filter predicates: {(_col2 > _col0)} {(_col1 <> _col3)} + Statistics: Num rows: 41500 Data size: 922896 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col4 is null (type: boolean) - Statistics: Num rows: 3803 Data size: 84601 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20750 Data size: 461448 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3803 Data size: 84601 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20750 Data size: 461448 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3803 Data size: 84601 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20750 Data size: 461448 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -698,7 +621,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from src b where not exists diff --git a/ql/src/test/results/clientpositive/udf_to_boolean.q.out b/ql/src/test/results/clientpositive/udf_to_boolean.q.out index bee030ab92..ebce364bf7 100644 --- a/ql/src/test/results/clientpositive/udf_to_boolean.q.out +++ b/ql/src/test/results/clientpositive/udf_to_boolean.q.out @@ -70,33 +70,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### true -PREHOOK: query: SELECT CAST(CAST('TRUE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(CAST('TRUE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -true -PREHOOK: query: SELECT CAST(CAST('true' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(CAST('true' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -true -PREHOOK: query: SELECT CAST(CAST('TrUe' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(CAST('TrUe' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -true PREHOOK: query: SELECT CAST(CAST('2011-05-06 07:08:09' as timestamp) AS BOOLEAN) FROM src tablesample (1 rows) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -178,33 +151,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### false -PREHOOK: query: SELECT CAST(CAST('FALSE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(CAST('FALSE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -false -PREHOOK: query: SELECT CAST(CAST('false' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(CAST('false' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -false -PREHOOK: query: SELECT CAST(CAST('FaLsE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(CAST('FaLsE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -false PREHOOK: query: SELECT CAST(CAST(0 as timestamp) AS BOOLEAN) FROM src tablesample (1 rows) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/vector_empty_where.q.out b/ql/src/test/results/clientpositive/vector_empty_where.q.out index 6b2c7fefa2..609a36cb1a 100644 --- a/ql/src/test/results/clientpositive/vector_empty_where.q.out +++ b/ql/src/test/results/clientpositive/vector_empty_where.q.out @@ -26,7 +26,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsTrue(col 13:boolean)(children: CastStringToBoolean(col 6) -> 13:boolean) + predicateExpression: SelectColumnIsTrue(col 14:boolean)(children: CastLongToBooleanViaLongToLong(col 13:bigint)(children: StringLength(col 6:string) -> 13:bigint) -> 14:boolean) predicate: cstring1 (type: string) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out b/ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out deleted file mode 100644 index 1b06698bca..0000000000 --- a/ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out +++ /dev/null @@ -1,156 +0,0 @@ -PREHOOK: query: create table t (s string) stored as orc -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t -POSTHOOK: query: create table t (s string) stored as orc -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t -PREHOOK: query: insert into t values ('false') -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values ('false') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: insert into t values ('FALSE') -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values ('FALSE') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: insert into t values ('FaLsE') -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values ('FaLsE') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: insert into t values ('true') -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values ('true') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: insert into t values ('TRUE') -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values ('TRUE') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: insert into t values ('TrUe') -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values ('TrUe') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: insert into t values ('') -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values ('') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: insert into t values ('Other') -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values ('Other') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: insert into t values ('Off') -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values ('Off') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: insert into t values ('No') -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values ('No') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: insert into t values ('0') -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values ('0') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: insert into t values ('1') -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values ('1') -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain select s,cast(s as boolean) from t order by s -PREHOOK: type: QUERY -POSTHOOK: query: explain select s,cast(s as boolean) from t order by s -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), UDFToBoolean(s) (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select s,cast(s as boolean) from t order by s -PREHOOK: type: QUERY -PREHOOK: Input: default@t -#### A masked pattern was here #### -POSTHOOK: query: select s,cast(s as boolean) from t order by s -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t -#### A masked pattern was here #### - false -0 false -1 true -FALSE false -FaLsE false -No false -Off false -Other true -TRUE true -TrUe true -false false -true true diff --git a/ql/src/test/results/clientpositive/vectorized_casts.q.out b/ql/src/test/results/clientpositive/vectorized_casts.q.out index f6f210533f..96a1770f76 100644 --- a/ql/src/test/results/clientpositive/vectorized_casts.q.out +++ b/ql/src/test/results/clientpositive/vectorized_casts.q.out @@ -179,8 +179,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 21, 0, 1, 2, 3, 22, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 67, 68, 69, 70, 66, 73] - selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastStringToBoolean(col 6) -> 21:boolean, CastDoubleToLong(col 4:float) -> 22:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27:int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLong(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51:timestamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 56:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:string, VectorUDFAdaptor(UDFToString(ctimestamp1)) -> 65:string, CastStringGroupToString(col 66:char(10))(children: CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10)) -> 67:string, CastStringGroupToString(col 66:varchar(10))(children: CastStringGroupToVarChar(col 6:string, maxLength 10) -> 66:varchar(10)) -> 68:string, CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 69:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 70:double, CastDoubleToString(col 71:double)(children: FuncSinDoubleToDouble(col 4:float) -> 71:double) -> 66:string, DoubleColAddDoubleColumn(col 71:double, col 72:double)(children: col 71:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double + projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 22, 0, 1, 2, 3, 21, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 67, 68, 69, 70, 66, 73] + selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastLongToBooleanViaLongToLong(col 21:bigint)(children: StringLength(col 6:string) -> 21:bigint) -> 22:boolean, CastDoubleToLong(col 4:float) -> 21:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27:int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLong(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51:timestamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 56:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:string, VectorUDFAdaptor(UDFToString(ctimestamp1)) -> 65:string, CastStringGroupToString(col 66:char(10))(children: CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10)) -> 67:string, CastStringGroupToString(col 66:varchar(10))(children: CastStringGroupToVarChar(col 6:string, maxLength 10) -> 66:varchar(10)) -> 68:string, CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 69:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 70:double, CastDoubleToString(col 71:double)(children: FuncSinDoubleToDouble(col 4:float) -> 71:double) -> 66:string, DoubleColAddDoubleColumn(col 71:double, col 72:double)(children: col 71:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java index 88e7fb00f4..6a4733f989 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java @@ -22,7 +22,6 @@ import java.io.DataOutput; import java.io.IOException; import java.nio.charset.CharacterCodingException; -import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; import java.time.DateTimeException; @@ -556,69 +555,19 @@ public static boolean getBoolean(Object o, PrimitiveObjectInspector oi) { } - enum FalseValues { - FALSE("false"), OFF("off"), NO("no"), ZERO("0"), EMPTY(""); - - private final byte[] bytes; - private String str; - - FalseValues(String s) { - str = s; - bytes = s.getBytes(StandardCharsets.UTF_8); - } - - public boolean accept(byte[] arr, int st) { - for (int i = 0; i < bytes.length; i++) { - byte b = arr[i + st]; - if (!(b == bytes[i] || b + 'a' - 'A' == bytes[i])) { - return false; - } - } - return true; - } - - public boolean accept(String s) { - return str.equalsIgnoreCase(s); - } - } - /** - * Parses a boolean from string - * - * Accepts "false","off","no","0" and "" as FALSE - * All other values are interpreted as true. - */ - public static boolean parseBoolean(byte[] arr, int st, int len) { - switch (len) { - case 5: - return !FalseValues.FALSE.accept(arr, st); - case 3: - return !FalseValues.OFF.accept(arr, st); - case 2: - return !FalseValues.NO.accept(arr, st); - case 1: - return !FalseValues.ZERO.accept(arr, st); - case 0: - return false; - default: - return true; - } - } - - private static final FalseValues[] FALSE_BOOLEANS = FalseValues.values(); + private static final String falseBooleans[] = { "false", "no", "off", "0", "" }; private static boolean parseBoolean(String s) { - for (int i = 0; i < FALSE_BOOLEANS.length; i++) { - if (FALSE_BOOLEANS[i].accept(s)) { + for(int i=0;i5) { + if(t.getLength()>5) return true; - } String strVal=t.toString(); return parseBoolean(strVal); } diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java index cbf19d0389..9d86a5494a 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java @@ -231,16 +231,10 @@ public void testGetBoolean() { for (String falseStr : mustEvaluateToFalse) { assertFalse(falseStr, PrimitiveObjectInspectorUtils.getBoolean(falseStr, PrimitiveObjectInspectorFactory.javaStringObjectInspector)); - - byte[] b1 = ("asd"+falseStr).getBytes(); - assertFalse(falseStr, PrimitiveObjectInspectorUtils.parseBoolean(b1, 3, falseStr.length())); - } for (String trueStr : mustEvaluateToTrue) { assertTrue(trueStr, PrimitiveObjectInspectorUtils.getBoolean(trueStr, PrimitiveObjectInspectorFactory.javaStringObjectInspector)); - byte[] b1 = ("asd"+trueStr).getBytes(); - assertTrue(trueStr, PrimitiveObjectInspectorUtils.parseBoolean(b1, 3, trueStr.length())); } } } diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index 13967d5186..b445723451 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -300,29 +300,21 @@ public void alterTable(RawStore msdb, Warehouse wh, String dbname, MetaStoreUtils.updateTableStatsFast(db, newt, wh, false, true, environmentContext); } - if (isPartitionedTable) { + if (cascade && isPartitionedTable) { //Currently only column related changes can be cascaded in alter table if(!MetaStoreUtils.areSameColumns(oldt.getSd().getCols(), newt.getSd().getCols())) { parts = msdb.getPartitions(dbname, name, -1); for (Partition part : parts) { - Partition oldPart = new Partition(part); List oldCols = part.getSd().getCols(); part.getSd().setCols(newt.getSd().getCols()); ColumnStatistics colStats = updateOrGetPartitionColumnStats(msdb, dbname, name, part.getValues(), oldCols, oldt, part, null); assert(colStats == null); - if (cascade) { - msdb.alterPartition(dbname, name, part.getValues(), part); - } else { - // update changed properties (stats) - oldPart.setParameters(part.getParameters()); - msdb.alterPartition(dbname, name, part.getValues(), oldPart); - } + msdb.alterPartition(dbname, name, part.getValues(), part); } msdb.alterTable(dbname, name, newt); } else { - LOG.warn("Alter table not cascaded to partitions."); - alterTableUpdateTableColumnStats(msdb, oldt, newt); + LOG.warn("Alter table does not cascade changes to its partitions."); } } else { alterTableUpdateTableColumnStats(msdb, oldt, newt); diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 2e80c9d3b1..0818704e01 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -3500,7 +3500,7 @@ private String makeParameterDeclarationStringObj(Map params) { LOG.debug("filter specified is {}, JDOQL filter is {}", filter, queryFilterString); if (LOG.isDebugEnabled()) { for (Entry entry : params.entrySet()) { - LOG.debug("key: {} value: {} class: {}", entry.getKey(), entry.getValue(), + LOG.debug("key: {} value: {} class: {}", entry.getKey(), entry.getValue(), entry.getValue().getClass().getName()); } } @@ -7665,7 +7665,7 @@ public boolean updatePartitionColumnStatistics(ColumnStatistics colStats, List getMTableColumnStatistics(Table table, List colNames, QueryWrapper queryWrapper) throws MetaException { if (colNames == null || colNames.isEmpty()) { - return Collections.emptyList(); + return null; } boolean committed = false; @@ -7750,9 +7750,7 @@ protected ColumnStatistics getJdoResult( try { List mStats = getMTableColumnStatistics(getTable(), colNames, queryWrapper); - if (mStats.isEmpty()) { - return null; - } + if (mStats.isEmpty()) return null; // LastAnalyzed is stored per column, but thrift object has it per multiple columns. // Luckily, nobody actually uses it, so we will set to lowest value of all columns for now. ColumnStatisticsDesc desc = StatObjectConverter.getTableColumnStatisticsDesc(mStats.get(0));