diff --git accumulo-handler/src/test/results/positive/accumulo_queries.q.out accumulo-handler/src/test/results/positive/accumulo_queries.q.out index d7cceec..de82857 100644 --- accumulo-handler/src/test/results/positive/accumulo_queries.q.out +++ accumulo-handler/src/test/results/positive/accumulo_queries.q.out @@ -40,8 +40,9 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE accumulo_table_1 SELECT POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 - Stage-2 is a root stage + Stage-2 + Stage-1 is a root stage + Stage-3 is a root stage STAGE PLANS: Stage: Stage-0 @@ -52,11 +53,15 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE - Stage: Stage-1 + Stage: Stage-2 Insert operator: Insert - Stage: Stage-2 + Stage: Stage-1 + Pre Insert operator: + Pre-Insert task + + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -495,9 +500,10 @@ ON (x.key = Y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 + Stage-2 + Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-0 @@ -508,11 +514,15 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE - Stage: Stage-1 + Stage: Stage-2 Insert operator: Insert - Stage: Stage-3 + Stage: Stage-1 + Pre Insert operator: + Pre-Insert task + + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -547,7 +557,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan diff --git accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out index 7330746..6621a4e 100644 --- accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out +++ accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out @@ -34,15 +34,16 @@ select value,"" where a.key > 50 AND a.key < 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 - Stage-3 is a root stage - Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 - Stage-6 - Stage-2 depends on stages: Stage-6, Stage-5, Stage-8 - Stage-4 depends on stages: Stage-2 - Stage-5 + Stage-2 + Stage-1 is a root stage + Stage-4 is a root stage + Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 Stage-7 - Stage-8 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-7, Stage-6, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 + Stage-8 + Stage-9 depends on stages: Stage-8 STAGE PLANS: Stage: Stage-0 @@ -53,11 +54,15 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE - Stage: Stage-1 + Stage: Stage-2 Insert operator: Insert - Stage: Stage-3 + Stage: Stage-1 + Pre Insert operator: + Pre-Insert task + + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -94,16 +99,16 @@ STAGE PLANS: serde: org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe name: default.src_x2 - Stage: Stage-9 + Stage: Stage-10 Conditional Operator - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-2 + Stage: Stage-3 Move Operator tables: replace: true @@ -113,10 +118,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -128,7 +133,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -140,7 +145,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 - Stage: Stage-8 + Stage: Stage-9 Move Operator files: hdfs directory: true diff --git ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java index 133ef0a..e9fe8fa 100644 --- ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java +++ ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java @@ -789,15 +789,6 @@ {"FilterTimestampColumnBetween", ""}, {"FilterTimestampColumnBetween", "!"}, - // This is for runtime min/max pushdown - don't need to do NOT BETWEEN - {"FilterColumnBetweenDynamicValue", "long", ""}, - {"FilterColumnBetweenDynamicValue", "double", ""}, - {"FilterColumnBetweenDynamicValue", "decimal", ""}, - {"FilterColumnBetweenDynamicValue", "string", ""}, - {"FilterColumnBetweenDynamicValue", "char", ""}, - {"FilterColumnBetweenDynamicValue", "varchar", ""}, - {"FilterColumnBetweenDynamicValue", "timestamp", ""}, - {"ColumnCompareColumn", "Equal", "long", "double", "=="}, {"ColumnCompareColumn", "Equal", "double", "double", "=="}, {"ColumnCompareColumn", "NotEqual", "long", "double", "!="}, @@ -1173,8 +1164,6 @@ private void generate() throws Exception { } else if (tdesc[0].equals("FilterColumnBetween")) { generateFilterColumnBetween(tdesc); - } else if (tdesc[0].equals("FilterColumnBetweenDynamicValue")) { - generateFilterColumnBetweenDynamicValue(tdesc); } else if (tdesc[0].equals("ScalarArithmeticColumn") || tdesc[0].equals("ScalarDivideColumn")) { generateScalarArithmeticColumn(tdesc); } else if (tdesc[0].equals("FilterColumnCompareColumn")) { @@ -1390,72 +1379,6 @@ private void generateFilterColumnBetween(String[] tdesc) throws Exception { className, templateString); } - private void generateFilterColumnBetweenDynamicValue(String[] tdesc) throws Exception { - String operandType = tdesc[1]; - String optionalNot = tdesc[2]; - - String className = "Filter" + getCamelCaseType(operandType) + "Column" + - (optionalNot.equals("!") ? "Not" : "") + "BetweenDynamicValue"; - - String typeName = getCamelCaseType(operandType); - String defaultValue; - String vectorType; - String getPrimitiveMethod; - String getValueMethod; - - if (operandType.equals("long")) { - defaultValue = "0"; - vectorType = "long"; - getPrimitiveMethod = "getLong"; - getValueMethod = ""; - } else if (operandType.equals("double")) { - defaultValue = "0"; - vectorType = "double"; - getPrimitiveMethod = "getDouble"; - getValueMethod = ""; - } else if (operandType.equals("decimal")) { - defaultValue = "null"; - vectorType = "HiveDecimal"; - getPrimitiveMethod = "getHiveDecimal"; - getValueMethod = ""; - } else if (operandType.equals("string")) { - defaultValue = "null"; - vectorType = "byte[]"; - getPrimitiveMethod = "getString"; - getValueMethod = ".getBytes()"; - } else if (operandType.equals("char")) { - defaultValue = "null"; - vectorType = "byte[]"; - getPrimitiveMethod = "getHiveChar"; - getValueMethod = ".getStrippedValue().getBytes()"; // Does vectorization use stripped char values? - } else if (operandType.equals("varchar")) { - defaultValue = "null"; - vectorType = "byte[]"; - getPrimitiveMethod = "getHiveVarchar"; - getValueMethod = ".getValue().getBytes()"; - } else if (operandType.equals("timestamp")) { - defaultValue = "null"; - vectorType = "Timestamp"; - getPrimitiveMethod = "getTimestamp"; - getValueMethod = ""; - } else { - throw new IllegalArgumentException("Type " + operandType + " not supported"); - } - - // Read the template into a string, expand it, and write it. - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); - String templateString = readFile(templateFile); - templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", typeName); - templateString = templateString.replaceAll("", defaultValue); - templateString = templateString.replaceAll("", vectorType); - templateString = templateString.replaceAll("", getPrimitiveMethod); - templateString = templateString.replaceAll("", getValueMethod); - - writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, - className, templateString); - } - private void generateColumnCompareColumn(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2]; @@ -3161,12 +3084,6 @@ static String getCamelCaseType(String type) { return "Timestamp"; } else if (type.equals("date")) { return "Date"; - } else if (type.equals("string")) { - return "String"; - } else if (type.equals("char")) { - return "Char"; - } else if (type.equals("varchar")) { - return "VarChar"; } else { return type; } diff --git beeline/pom.xml beeline/pom.xml index 58ca92e..5503add 100644 --- beeline/pom.xml +++ beeline/pom.xml @@ -29,7 +29,6 @@ .. - 1.6.6 @@ -120,15 +119,8 @@ test - org.powermock - powermock-module-junit4 - ${powermock.version} - test - - - org.powermock - powermock-api-mockito - ${powermock.version} + org.mockito + mockito-all test diff --git beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java index ea58776..a981bce 100644 --- beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java +++ beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java @@ -815,10 +815,38 @@ private void runBeeLine(String scriptDir, String scriptFile) // Generate the beeline args per hive conf and execute the given script public void runBeeLine(String sqlScriptFile) throws IOException { - CommandBuilder builder = new CommandBuilder(hiveConf, userName, passWord, sqlScriptFile); + List argList = new ArrayList(); + argList.add("-u"); + argList.add(HiveSchemaHelper.getValidConfVar( + ConfVars.METASTORECONNECTURLKEY, hiveConf)); + argList.add("-d"); + argList.add(HiveSchemaHelper.getValidConfVar( + ConfVars.METASTORE_CONNECTION_DRIVER, hiveConf)); + argList.add("-n"); + argList.add(userName); + argList.add("-p"); + argList.add(passWord); + argList.add("-f"); + argList.add(sqlScriptFile); + + if (LOG.isDebugEnabled()) { + LOG.debug("Going to invoke file that contains:"); + BufferedReader reader = new BufferedReader(new FileReader(sqlScriptFile)); + try { + String line; + while ((line = reader.readLine()) != null) { + LOG.debug("script: " + line); + } + } finally { + if (reader != null) { + reader.close(); + } + } + } // run the script using Beeline - try (BeeLine beeLine = new BeeLine()) { + BeeLine beeLine = new BeeLine(); + try { if (!verbose) { beeLine.setOutputStream(new PrintStream(new NullOutputStream())); beeLine.getOpts().setSilent(true); @@ -828,53 +856,13 @@ public void runBeeLine(String sqlScriptFile) throws IOException { // We can be pretty sure that an entire line can be processed as a single command since // we always add a line separator at the end while calling dbCommandParser.buildCommand. beeLine.getOpts().setEntireLineAsCommand(true); - LOG.debug("Going to run command <" + builder.buildToLog() + ">"); - int status = beeLine.begin(builder.buildToRun(), null); + LOG.debug("Going to run command <" + StringUtils.join(argList, " ") + ">"); + int status = beeLine.begin(argList.toArray(new String[0]), null); if (status != 0) { throw new IOException("Schema script failed, errorcode " + status); } - } - } - - static class CommandBuilder { - private final HiveConf hiveConf; - private final String userName; - private final String password; - private final String sqlScriptFile; - - CommandBuilder(HiveConf hiveConf, String userName, String password, String sqlScriptFile) { - this.hiveConf = hiveConf; - this.userName = userName; - this.password = password; - this.sqlScriptFile = sqlScriptFile; - } - - String[] buildToRun() throws IOException { - return argsWith(password); - } - - String buildToLog() throws IOException { - logScript(); - return StringUtils.join(argsWith(BeeLine.PASSWD_MASK), " "); - } - - private String[] argsWith(String password) throws IOException { - return new String[] { "-u", - HiveSchemaHelper.getValidConfVar(ConfVars.METASTORECONNECTURLKEY, hiveConf), "-d", - HiveSchemaHelper.getValidConfVar(ConfVars.METASTORE_CONNECTION_DRIVER, hiveConf), "-n", - userName, "-p", password, "-f", sqlScriptFile }; - } - - private void logScript() throws IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("Going to invoke file that contains:"); - try (BufferedReader reader = new BufferedReader(new FileReader(sqlScriptFile))) { - String line; - while ((line = reader.readLine()) != null) { - LOG.debug("script: " + line); - } - } - } + } finally { + beeLine.close(); } } diff --git common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index c78f005..25c7508 100644 --- common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -232,30 +232,6 @@ public static void clearColumnStatsState(Map params) { } } - public static void removeColumnStatsState(Map params, List colNames) { - String statsAcc; - if (params != null && (statsAcc = params.get(COLUMN_STATS_ACCURATE)) != null) { - // statsAcc may not be jason format, which will throw exception - JSONObject stats = parseStatsAcc(statsAcc); - try { - JSONObject colStats = stats.getJSONObject(COLUMN_STATS); - for (String colName : colNames) { - if (colStats.has(colName)) { - colStats.remove(colName); - } - } - if (colStats.length() != 0) { - stats.put(COLUMN_STATS, colStats); - } else { - stats.remove(COLUMN_STATS); - } - params.put(COLUMN_STATS_ACCURATE, stats.toString()); - } catch (JSONException e) { - LOG.debug(e.getMessage()); - } - } - } - public static void setBasicStatsStateForCreateTable(Map params, String setting) { if (TRUE.equals(setting)) { for (String stat : StatsSetupConst.supportedStats) { diff --git common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java index 5883b01..abad45e 100644 --- common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java +++ common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java @@ -1,20 +1,3 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package org.apache.hadoop.hive.common.classification; diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 29de605..291ca7d 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1079,8 +1079,6 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal + " expressed as multiple of Local FS write cost"), HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5", "Default cost of reading a byte from HDFS;" + " expressed as multiple of Local FS read cost"), - HIVE_CBO_SHOW_WARNINGS("hive.cbo.show.warnings", true, - "Toggle display of CBO warnings like missing column stats"), AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", false, "push aggregates through join"), SEMIJOIN_CONVERSION("hive.enable.semijoin.conversion", true, "convert group by followed by inner equi join into semijoin"), HIVE_COLUMN_ALIGNMENT("hive.order.columnalignment", true, "Flag to control whether we want to try to align" + @@ -1948,10 +1946,6 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal new TimeValidator(TimeUnit.MILLISECONDS), "Time interval describing how often the reaper runs"), WRITE_SET_REAPER_INTERVAL("hive.writeset.reaper.interval", "60s", new TimeValidator(TimeUnit.MILLISECONDS), "Frequency of WriteSet reaper runs"), - - MERGE_CARDINALITY_VIOLATION_CHECK("hive.merge.cardinality.check", true, - "Set to true to ensure that each SQL Merge statement ensures that for each row in the target\n" + - "table there is at most 1 matching row in the source table per SQL Specification."), // For Druid storage handler HIVE_DRUID_INDEXING_GRANULARITY("hive.druid.indexer.segments.granularity", "DAY", @@ -2366,14 +2360,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_SERVER2_TEZ_SESSION_MAX_INIT_THREADS("hive.server2.tez.sessions.init.threads", 16, "If hive.server2.tez.initialize.default.sessions is enabled, the maximum number of\n" + "threads to use to initialize the default sessions."), - HIVE_SERVER2_TEZ_SESSION_RESTRICTED_CONFIGS("hive.server2.tez.sessions.restricted.configs", "", - "The configuration settings that cannot be set when submitting jobs to HiveServer2. If\n" + - "any of these are set to values different from those in the server configuration, an\n" + - "exception will be thrown."), - HIVE_SERVER2_TEZ_SESSION_CUSTOM_QUEUE_ALLOWED("hive.server2.tez.sessions.custom.queue.allowed", - "true", new StringSet("true", "false", "ignore"), - "Whether Tez session pool should allow submitting queries to custom queues. The options\n" + - "are true, false (error out), ignore (accept the query but ignore the queue setting)."), + // Operation log configuration HIVE_SERVER2_LOGGING_OPERATION_ENABLED("hive.server2.logging.operation.enabled", true, @@ -3094,10 +3081,6 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal LLAP_DAEMON_NUM_EXECUTORS("hive.llap.daemon.num.executors", 4, "Number of executors to use in LLAP daemon; essentially, the number of tasks that can be\n" + "executed in parallel.", "llap.daemon.num.executors"), - LLAP_DAEMON_AM_REPORTER_MAX_THREADS("hive.llap.daemon.am-reporter.max.threads", 4, - "Maximum number of threads to be used for AM reporter. If this is lower than number of\n" + - "executors in llap daemon, it would be set to number of executors at runtime.", - "llap.daemon.am-reporter.max.threads"), LLAP_DAEMON_RPC_PORT("hive.llap.daemon.rpc.port", 0, "The LLAP daemon RPC port.", "llap.daemon.rpc.port. A value of 0 indicates a dynamic port"), LLAP_DAEMON_MEMORY_PER_INSTANCE_MB("hive.llap.daemon.memory.per.instance.mb", 4096, @@ -3218,7 +3201,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Whether to create the LLAP coordinator; since execution engine and container vs llap\n" + "settings are both coming from job configs, we don't know at start whether this should\n" + "be created. Default true."), - LLAP_DAEMON_LOGGER("hive.llap.daemon.logger", Constants.LLAP_LOGGER_NAME_QUERY_ROUTING, + LLAP_DAEMON_LOGGER("hive.llap.daemon.logger", Constants.LLAP_LOGGER_NAME_RFA, new StringSet(Constants.LLAP_LOGGER_NAME_QUERY_ROUTING, Constants.LLAP_LOGGER_NAME_RFA, Constants.LLAP_LOGGER_NAME_CONSOLE), @@ -3292,18 +3275,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_CONF_RESTRICTED_LIST("hive.conf.restricted.list", "hive.security.authenticator.manager,hive.security.authorization.manager," + "hive.security.metastore.authorization.manager,hive.security.metastore.authenticator.manager," + - "hive.users.in.admin.role,hive.server2.xsrf.filter.enabled,hive.security.authorization.enabled," + - "hive.server2.authentication.ldap.baseDN," + - "hive.server2.authentication.ldap.url," + - "hive.server2.authentication.ldap.Domain," + - "hive.server2.authentication.ldap.groupDNPattern," + - "hive.server2.authentication.ldap.groupFilter," + - "hive.server2.authentication.ldap.userDNPattern," + - "hive.server2.authentication.ldap.userFilter," + - "hive.server2.authentication.ldap.groupMembershipKey," + - "hive.server2.authentication.ldap.userMembershipKey," + - "hive.server2.authentication.ldap.groupClassKey," + - "hive.server2.authentication.ldap.customLDAPQuery", + "hive.users.in.admin.role,hive.server2.xsrf.filter.enabled,hive.security.authorization.enabled", "Comma separated list of configuration options which are immutable at runtime"), HIVE_CONF_HIDDEN_LIST("hive.conf.hidden.list", METASTOREPWD.varname + "," + HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname @@ -3340,7 +3312,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "See HIVE-15121 for details."); public final String varname; - public final String altName; + private final String altName; private final String defaultExpr; public final String defaultStrVal; @@ -3864,11 +3836,6 @@ public static String getVar(Configuration conf, ConfVars var) { : conf.get(var.varname, var.defaultStrVal); } - public static String getVarWithoutType(Configuration conf, ConfVars var) { - return var.altName != null ? conf.get(var.varname, conf.get(var.altName, var.defaultExpr)) - : conf.get(var.varname, var.defaultExpr); - } - public static String getTrimmedVar(Configuration conf, ConfVars var) { assert (var.valClass == String.class) : var.varname; if (var.altName != null) { @@ -4544,26 +4511,4 @@ public static String generateMrDeprecationWarning() { + "Consider using a different execution engine (i.e. " + HiveConf.getNonMrEngines() + ") or using Hive 1.X releases."; } - - private static final Object reverseMapLock = new Object(); - private static HashMap reverseMap = null; - - public static HashMap getOrCreateReverseMap() { - // This should be called rarely enough; for now it's ok to just lock every time. - synchronized (reverseMapLock) { - if (reverseMap != null) return reverseMap; - } - HashMap vars = new HashMap<>(); - for (ConfVars val : ConfVars.values()) { - vars.put(val.varname.toLowerCase(), val); - if (val.altName != null && !val.altName.isEmpty()) { - vars.put(val.altName.toLowerCase(), val); - } - } - synchronized (reverseMapLock) { - if (reverseMap != null) return reverseMap; - reverseMap = vars; - return reverseMap; - } - } } diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java index 904ac80..cff0056 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java @@ -42,6 +42,7 @@ import io.druid.segment.loading.SegmentLoadingException; import io.druid.timeline.DataSegment; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.Constants; @@ -49,16 +50,21 @@ import org.apache.hadoop.hive.druid.io.DruidOutputFormat; import org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat; import org.apache.hadoop.hive.druid.serde.DruidSerDe; -import org.apache.hadoop.hive.metastore.HiveMetaHookV2; +import org.apache.hadoop.hive.metastore.DefaultHiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider; +import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputFormat; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; @@ -79,7 +85,7 @@ * DruidStorageHandler provides a HiveStorageHandler implementation for Druid. */ @SuppressWarnings({ "deprecation", "rawtypes" }) -public class DruidStorageHandler extends DefaultStorageHandler implements HiveMetaHookV2 { +public class DruidStorageHandler extends DefaultHiveMetaHook implements HiveStorageHandler { protected static final Logger LOG = LoggerFactory.getLogger(DruidStorageHandler.class); @@ -99,6 +105,8 @@ private String rootWorkingDir = null; + private Configuration conf; + public DruidStorageHandler() { //this is the default value in druid final String base = HiveConf @@ -178,6 +186,17 @@ public HiveMetaHook getMetaHook() { } @Override + public HiveAuthorizationProvider getAuthorizationProvider() throws HiveException { + return new DefaultHiveAuthorizationProvider(); + } + + @Override + public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties + ) { + + } + + @Override public void preCreateTable(Table table) throws MetaException { // Do safety checks if (MetaStoreUtils.isExternalTable(table) && !StringUtils @@ -476,7 +495,9 @@ public void commitInsertTable(Table table, boolean overwrite) throws MetaExcepti @Override public void preInsertTable(Table table, boolean overwrite) throws MetaException { - //do nothing + if (!overwrite) { + throw new MetaException("INSERT INTO statement is not allowed by druid storage handler"); + } } @Override @@ -492,6 +513,27 @@ public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties + ) { + + } + + @Override + public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { + + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override public String toString() { return Constants.DRUID_HIVE_STORAGE_HANDLER_ID; } diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java index 12bbd73..86ddca8 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java @@ -140,7 +140,6 @@ break; case serdeConstants.FLOAT_TYPE_NAME: case serdeConstants.DOUBLE_TYPE_NAME: - case serdeConstants.DECIMAL_TYPE_NAME: af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i)); break; default: diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java index 3323cc0..1601a9a 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java @@ -1,20 +1,3 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package org.apache.hadoop.hive.druid.io; import com.google.common.base.Function; diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java index ea84326..9548d96 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java @@ -46,7 +46,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; @@ -419,10 +418,6 @@ public Writable serialize(Object o, ObjectInspector objectInspector) throws SerD res = ((DoubleObjectInspector) fields.get(i).getFieldObjectInspector()) .get(values.get(i)); break; - case DECIMAL: - res = ((HiveDecimalObjectInspector) fields.get(i).getFieldObjectInspector()) - .getPrimitiveJavaObject(values.get(i)).doubleValue(); - break; case STRING: res = ((StringObjectInspector) fields.get(i).getFieldObjectInspector()) .getPrimitiveJavaObject( diff --git druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe2.java druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe2.java deleted file mode 100644 index 06b2072..0000000 --- druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe2.java +++ /dev/null @@ -1,92 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.druid; - -import java.util.List; - -import org.apache.hadoop.hive.druid.serde.DruidSerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.util.StringUtils; - -import com.fasterxml.jackson.core.type.TypeReference; - -import io.druid.query.metadata.metadata.SegmentAnalysis; -import io.druid.query.metadata.metadata.SegmentMetadataQuery; - -/** - * Druid SerDe to be used in tests. - */ -public class QTestDruidSerDe2 extends DruidSerDe { - - // Request : - // "{\"queryType\":\"segmentMetadata\",\"dataSource\":{\"type\":\"table\",\"name\":\"wikipedia\"}," - // + "\"intervals\":{\"type\":\"intervals\"," - // + "\"intervals\":[\"-146136543-09-08T00:30:34.096-07:52:58/146140482-04-24T08:36:27.903-07:00\"]}," - // + "\"toInclude\":{\"type\":\"all\"},\"merge\":true,\"context\":null,\"analysisTypes\":[]," - // + "\"usingDefaultInterval\":true,\"lenientAggregatorMerge\":false,\"descending\":false}"; - private static final String RESPONSE = - "[ {\r\n " - + " \"id\" : \"merged\",\r\n " - + " \"intervals\" : [ \"2010-01-01T00:00:00.000Z/2015-12-31T00:00:00.000Z\" ],\r\n " - + " \"columns\" : {\r\n " - + " \"__time\" : { \"type\" : \"LONG\", \"hasMultipleValues\" : false, \"size\" : 407240380, \"cardinality\" : null, \"errorMessage\" : null },\r\n " - + " \"robot\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : false, \"size\" : 100000, \"cardinality\" : 1944, \"errorMessage\" : null },\r\n " - + " \"namespace\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : true, \"size\" : 100000, \"cardinality\" : 1504, \"errorMessage\" : null },\r\n " - + " \"anonymous\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : false, \"size\" : 100000, \"cardinality\" : 1944, \"errorMessage\" : null },\r\n " - // Next column has a similar name as previous, but different casing. - // This is allowed in Druid, but it should fail in Hive. - + " \"Anonymous\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : false, \"size\" : 100000, \"cardinality\" : 1944, \"errorMessage\" : null },\r\n " - + " \"page\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : false, \"size\" : 100000, \"cardinality\" : 1944, \"errorMessage\" : null },\r\n " - + " \"language\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : false, \"size\" : 100000, \"cardinality\" : 1944, \"errorMessage\" : null },\r\n " - + " \"newpage\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : false, \"size\" : 100000, \"cardinality\" : 1944, \"errorMessage\" : null },\r\n " - + " \"user\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : false, \"size\" : 100000, \"cardinality\" : 1944, \"errorMessage\" : null },\r\n " - + " \"count\" : { \"type\" : \"FLOAT\", \"hasMultipleValues\" : false, \"size\" : 100000, \"cardinality\" : null, \"errorMessage\" : null },\r\n " - + " \"added\" : { \"type\" : \"FLOAT\", \"hasMultipleValues\" : false, \"size\" : 100000, \"cardinality\" : null, \"errorMessage\" : null },\r\n " - + " \"delta\" : { \"type\" : \"FLOAT\", \"hasMultipleValues\" : false, \"size\" : 100000, \"cardinality\" : null, \"errorMessage\" : null },\r\n " - + " \"variation\" : { \"type\" : \"FLOAT\", \"hasMultipleValues\" : false, \"size\" : 100000, \"cardinality\" : null, \"errorMessage\" : null },\r\n " - + " \"deleted\" : { \"type\" : \"FLOAT\", \"hasMultipleValues\" : false, \"size\" : 100000, \"cardinality\" : null, \"errorMessage\" : null }\r\n " - + " },\r\n " - + " \"aggregators\" : {\r\n " - + " \"count\" : { \"type\" : \"longSum\", \"name\" : \"count\", \"fieldName\" : \"count\" },\r\n " - + " \"added\" : { \"type\" : \"doubleSum\", \"name\" : \"added\", \"fieldName\" : \"added\" },\r\n " - + " \"delta\" : { \"type\" : \"doubleSum\", \"name\" : \"delta\", \"fieldName\" : \"delta\" },\r\n " - + " \"variation\" : { \"type\" : \"doubleSum\", \"name\" : \"variation\", \"fieldName\" : \"variation\" },\r\n " - + " \"deleted\" : { \"type\" : \"doubleSum\", \"name\" : \"deleted\", \"fieldName\" : \"deleted\" }\r\n " - + " },\r\n " - + " \"queryGranularity\" : {\r\n \"type\": \"none\"\r\n },\r\n " - + " \"size\" : 300000,\r\n " - + " \"numRows\" : 5000000\r\n} ]"; - - /* Submits the request and returns */ - @Override - protected SegmentAnalysis submitMetadataRequest(String address, SegmentMetadataQuery query) - throws SerDeException { - // Retrieve results - List resultsList; - try { - resultsList = DruidStorageHandlerUtils.JSON_MAPPER.readValue(RESPONSE, - new TypeReference>() { - } - ); - } catch (Exception e) { - throw new SerDeException(StringUtils.stringifyException(e)); - } - return resultsList.get(0); - } - -} diff --git druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidStorageHandler2.java druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidStorageHandler2.java deleted file mode 100644 index 6ac4df9..0000000 --- druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidStorageHandler2.java +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.druid; - -import org.apache.hadoop.hive.serde2.AbstractSerDe; - -/** - * Storage handler for Druid to be used in tests. It cannot connect to - * Druid, and thus it cannot execute queries. - */ -@SuppressWarnings("deprecation") -public class QTestDruidStorageHandler2 extends DruidStorageHandler { - - @Override - public Class getSerDeClass() { - return QTestDruidSerDe2.class; - } - -} diff --git druid-handler/src/test/org/apache/hadoop/hive/druid/TestDruidSerDe.java druid-handler/src/test/org/apache/hadoop/hive/druid/TestDruidSerDe.java index 8dfa4d7..e6e3707 100644 --- druid-handler/src/test/org/apache/hadoop/hive/druid/TestDruidSerDe.java +++ druid-handler/src/test/org/apache/hadoop/hive/druid/TestDruidSerDe.java @@ -31,7 +31,6 @@ import java.util.Properties; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.druid.serde.DruidGroupByQueryRecordReader; import org.apache.hadoop.hive.druid.serde.DruidQueryRecordReader; @@ -46,7 +45,6 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -641,14 +639,13 @@ private static void deserializeQueryResults(DruidSerDe serDe, String queryType, } - private static final String COLUMN_NAMES = "__time,c0,c1,c2,c3,c4,c5,c6,c7"; - private static final String COLUMN_TYPES = "timestamp,string,double,float,decimal(38,18),bigint,int,smallint,tinyint"; + private static final String COLUMN_NAMES = "__time,c0,c1,c2,c3,c4,c5,c6"; + private static final String COLUMN_TYPES = "timestamp,string,double,float,bigint,int,smallint,tinyint"; private static final Object[] ROW_OBJECT = new Object[] { new TimestampWritable(new Timestamp(1377907200000L)), new Text("dim1_val"), - new DoubleWritable(10669.3D), - new FloatWritable(10669.45F), - new HiveDecimalWritable(HiveDecimal.create(1064.34D)), + new DoubleWritable(10669D), + new FloatWritable(10669F), new LongWritable(1113939), new IntWritable(1112123), new ShortWritable((short) 12), @@ -659,13 +656,12 @@ private static void deserializeQueryResults(DruidSerDe serDe, String queryType, ImmutableMap.builder() .put("__time", 1377907200000L) .put("c0", "dim1_val") - .put("c1", 10669.3D) - .put("c2", 10669.45F) - .put("c3", 1064.34D) - .put("c4", 1113939L) - .put("c5", 1112123) - .put("c6", (short) 12) - .put("c7", (byte) 0) + .put("c1", 10669D) + .put("c2", 10669F) + .put("c3", 1113939L) + .put("c4", 1112123) + .put("c5", (short) 12) + .put("c6", (byte) 0) .put("__time_granularity", 1377907200000L) .build()); diff --git hbase-handler/src/test/results/positive/hbase_queries.q.out hbase-handler/src/test/results/positive/hbase_queries.q.out index 1eeaf80..276b6b8 100644 --- hbase-handler/src/test/results/positive/hbase_queries.q.out +++ hbase-handler/src/test/results/positive/hbase_queries.q.out @@ -40,8 +40,9 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 - Stage-2 is a root stage + Stage-2 + Stage-1 is a root stage + Stage-3 is a root stage STAGE PLANS: Stage: Stage-0 @@ -52,11 +53,15 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE - Stage: Stage-1 + Stage: Stage-2 Insert operator: Insert - Stage: Stage-2 + Stage: Stage-1 + Pre Insert operator: + Pre-Insert task + + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -498,9 +503,10 @@ ON (x.key = Y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 + Stage-2 + Stage-1 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-0 @@ -511,11 +517,15 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE - Stage: Stage-1 + Stage: Stage-2 Insert operator: Insert - Stage: Stage-3 + Stage: Stage-1 + Pre Insert operator: + Pre-Insert task + + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -550,7 +560,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan diff --git hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out index 079fb0e..68a417d 100644 --- hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out +++ hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out @@ -34,15 +34,16 @@ select value,"" where a.key > 50 AND a.key < 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 - Stage-3 is a root stage - Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 - Stage-6 - Stage-2 depends on stages: Stage-6, Stage-5, Stage-8 - Stage-4 depends on stages: Stage-2 - Stage-5 + Stage-2 + Stage-1 is a root stage + Stage-4 is a root stage + Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 Stage-7 - Stage-8 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-7, Stage-6, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 + Stage-8 + Stage-9 depends on stages: Stage-8 STAGE PLANS: Stage: Stage-0 @@ -53,11 +54,15 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE - Stage: Stage-1 + Stage: Stage-2 Insert operator: Insert - Stage: Stage-3 + Stage: Stage-1 + Pre Insert operator: + Pre-Insert task + + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -94,16 +99,16 @@ STAGE PLANS: serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.src_x2 - Stage: Stage-9 + Stage: Stage-10 Conditional Operator - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-2 + Stage: Stage-3 Move Operator tables: replace: true @@ -113,10 +118,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -128,7 +133,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -140,7 +145,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 - Stage: Stage-8 + Stage: Stage-9 Move Operator files: hdfs directory: true diff --git hbase-handler/src/test/results/positive/hbasestats.q.out hbase-handler/src/test/results/positive/hbasestats.q.out index 4e47bf5..bf902e4 100644 --- hbase-handler/src/test/results/positive/hbasestats.q.out +++ hbase-handler/src/test/results/positive/hbasestats.q.out @@ -63,8 +63,9 @@ POSTHOOK: query: explain INSERT OVERWRITE TABLE users SELECT 'user1', 'IA', 'USA POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 - Stage-2 is a root stage + Stage-2 + Stage-1 is a root stage + Stage-3 is a root stage STAGE PLANS: Stage: Stage-0 @@ -75,11 +76,15 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE - Stage: Stage-1 + Stage: Stage-2 Insert operator: Insert - Stage: Stage-2 + Stage: Stage-1 + Pre Insert operator: + Pre-Insert task + + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java index 4df2758..7524c49 100644 --- hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java @@ -23,6 +23,7 @@ import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileChecksum; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -31,7 +32,6 @@ import org.apache.hadoop.hive.metastore.MetaStoreEventListener; import org.apache.hadoop.hive.metastore.RawStore; import org.apache.hadoop.hive.metastore.RawStoreProxy; -import org.apache.hadoop.hive.metastore.ReplChangeManager; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.Function; @@ -59,6 +59,7 @@ import org.apache.hadoop.hive.metastore.messaging.EventMessage.EventType; import org.apache.hadoop.hive.metastore.messaging.MessageFactory; import org.apache.hadoop.hive.metastore.messaging.PartitionFiles; +import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -222,8 +223,7 @@ public String next() { try { FileStatus file = files[i]; i++; - return ReplChangeManager.encodeFileUri(file.getPath().toString(), - ReplChangeManager.getChksumString(file.getPath(), fs)); + return buildFileWithChksum(file.getPath(), fs); } catch (IOException e) { throw new RuntimeException(e); } @@ -520,6 +520,16 @@ public void setTimeToLive(long configTtl) { } + String buildFileWithChksum(Path p, FileSystem fs) throws IOException { + FileChecksum cksum = fs.getFileChecksum(p); + String chksumString = null; + if (cksum != null) { + chksumString = + StringUtils.byteToHexString(cksum.getBytes(), 0, cksum.getLength()); + } + return encodeFileUri(p.toString(), chksumString); + } + // TODO: this needs to be enhanced once change management based filesystem is implemented // Currently using fileuri#checksum as the format private String encodeFileUri(String fileUriStr, String fileChecksum) { diff --git hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java index 81f6155..1a7cfae 100644 --- hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java +++ hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java @@ -562,11 +562,10 @@ private static IMetaStoreClient getMetaStoreClient(HiveEndPoint endPoint, HiveCo private final RecordWriter recordWriter; private final List txnIds; - //volatile because heartbeat() may be in a "different" thread; updates of this are "piggybacking" - private volatile int currentTxnIndex = -1; + private int currentTxnIndex = -1; private final String partNameForLock; - //volatile because heartbeat() may be in a "different" thread - private volatile TxnState state; + + private TxnState state; private LockRequest lockRequest = null; /** * once any operation on this batch encounters a system exception @@ -946,14 +945,7 @@ public void heartbeat() throws StreamingException, HeartBeatFailure { if(isClosed) { return; } - if(state != TxnState.OPEN && currentTxnIndex >= txnIds.size() - 1) { - //here means last txn in the batch is resolved but the close() hasn't been called yet so - //there is nothing to heartbeat - return; - } - //if here after commit()/abort() but before next beginNextTransaction(), currentTxnIndex still - //points at the last txn which we don't want to heartbeat - Long first = txnIds.get(state == TxnState.OPEN ? currentTxnIndex : currentTxnIndex + 1); + Long first = txnIds.get(currentTxnIndex); Long last = txnIds.get(txnIds.size()-1); try { HeartbeatTxnRangeResponse resp = heartbeaterMSClient.heartbeatTxnRange(first, last); diff --git hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java index bf29993..197ca7b 100644 --- hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java +++ hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java @@ -669,25 +669,6 @@ public void testHeartbeat() throws Exception { Assert.assertEquals("Acquired timestamp didn't match", acquiredAt, lock.getAcquiredat()); Assert.assertTrue("Expected new heartbeat (" + lock.getLastheartbeat() + ") == old heartbeat(" + heartbeatAt +")", lock.getLastheartbeat() == heartbeatAt); - txnBatch.close(); - int txnBatchSize = 200; - txnBatch = connection.fetchTransactionBatch(txnBatchSize, writer); - for(int i = 0; i < txnBatchSize; i++) { - txnBatch.beginNextTransaction(); - if(i % 47 == 0) { - txnBatch.heartbeat(); - } - if(i % 10 == 0) { - txnBatch.abort(); - } - else { - txnBatch.commit(); - } - if(i % 37 == 0) { - txnBatch.heartbeat(); - } - } - } @Test public void testTransactionBatchEmptyAbort() throws Exception { diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java index 1ac4d01..205c640 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java @@ -53,7 +53,6 @@ private static Warehouse warehouse; private static MiniDFSCluster m_dfs; private static String cmroot; - private static FileSystem fs; @BeforeClass public static void setUp() throws Exception { @@ -66,7 +65,6 @@ public static void setUp() throws Exception { hiveConf.set(HiveConf.ConfVars.REPLCMDIR.varname, cmroot); hiveConf.setInt(CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY, 60); warehouse = new Warehouse(hiveConf); - fs = new Path(cmroot).getFileSystem(hiveConf); try { client = new HiveMetaStoreClient(hiveConf); } catch (Throwable e) { @@ -153,15 +151,15 @@ public void testRecyclePartTable() throws Exception { Path part1Path = new Path(warehouse.getPartitionPath(db, tblName, ImmutableMap.of("dt", "20160101")), "part"); createFile(part1Path, "p1"); - String path1Chksum = ReplChangeManager.getChksumString(part1Path, fs); + String path1Chksum = ReplChangeManager.getCksumString(part1Path, hiveConf); Path part2Path = new Path(warehouse.getPartitionPath(db, tblName, ImmutableMap.of("dt", "20160102")), "part"); createFile(part2Path, "p2"); - String path2Chksum = ReplChangeManager.getChksumString(part2Path, fs); + String path2Chksum = ReplChangeManager.getCksumString(part2Path, hiveConf); Path part3Path = new Path(warehouse.getPartitionPath(db, tblName, ImmutableMap.of("dt", "20160103")), "part"); createFile(part3Path, "p3"); - String path3Chksum = ReplChangeManager.getChksumString(part3Path, fs); + String path3Chksum = ReplChangeManager.getCksumString(part3Path, hiveConf); Assert.assertTrue(part1Path.getFileSystem(hiveConf).exists(part1Path)); Assert.assertTrue(part2Path.getFileSystem(hiveConf).exists(part2Path)); @@ -223,15 +221,15 @@ public void testRecycleNonPartTable() throws Exception { Path filePath1 = new Path(warehouse.getTablePath(db, tblName), "part1"); createFile(filePath1, "f1"); - String fileChksum1 = ReplChangeManager.getChksumString(filePath1, fs); + String fileChksum1 = ReplChangeManager.getCksumString(filePath1, hiveConf); Path filePath2 = new Path(warehouse.getTablePath(db, tblName), "part2"); createFile(filePath2, "f2"); - String fileChksum2 = ReplChangeManager.getChksumString(filePath2, fs); + String fileChksum2 = ReplChangeManager.getCksumString(filePath2, hiveConf); Path filePath3 = new Path(warehouse.getTablePath(db, tblName), "part3"); createFile(filePath3, "f3"); - String fileChksum3 = ReplChangeManager.getChksumString(filePath3, fs); + String fileChksum3 = ReplChangeManager.getCksumString(filePath3, hiveConf); Assert.assertTrue(filePath1.getFileSystem(hiveConf).exists(filePath1)); Assert.assertTrue(filePath2.getFileSystem(hiveConf).exists(filePath2)); @@ -269,26 +267,26 @@ public void testClearer() throws Exception { fs.mkdirs(dirTbl1); Path part11 = new Path(dirTbl1, "part1"); createFile(part11, "testClearer11"); - String fileChksum11 = ReplChangeManager.getChksumString(part11, fs); + String fileChksum11 = ReplChangeManager.getCksumString(part11, hiveConf); Path part12 = new Path(dirTbl1, "part2"); createFile(part12, "testClearer12"); - String fileChksum12 = ReplChangeManager.getChksumString(part12, fs); + String fileChksum12 = ReplChangeManager.getCksumString(part12, hiveConf); Path dirTbl2 = new Path(dirDb, "tbl2"); fs.mkdirs(dirTbl2); Path part21 = new Path(dirTbl2, "part1"); createFile(part21, "testClearer21"); - String fileChksum21 = ReplChangeManager.getChksumString(part21, fs); + String fileChksum21 = ReplChangeManager.getCksumString(part21, hiveConf); Path part22 = new Path(dirTbl2, "part2"); createFile(part22, "testClearer22"); - String fileChksum22 = ReplChangeManager.getChksumString(part22, fs); + String fileChksum22 = ReplChangeManager.getCksumString(part22, hiveConf); Path dirTbl3 = new Path(dirDb, "tbl3"); fs.mkdirs(dirTbl3); Path part31 = new Path(dirTbl3, "part1"); createFile(part31, "testClearer31"); - String fileChksum31 = ReplChangeManager.getChksumString(part31, fs); + String fileChksum31 = ReplChangeManager.getCksumString(part31, hiveConf); Path part32 = new Path(dirTbl3, "part2"); createFile(part32, "testClearer32"); - String fileChksum32 = ReplChangeManager.getChksumString(part32, fs); + String fileChksum32 = ReplChangeManager.getCksumString(part32, hiveConf); ReplChangeManager.getInstance(hiveConf).recycle(dirTbl1, false); ReplChangeManager.getInstance(hiveConf).recycle(dirTbl2, false); diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestReplicationScenarios.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestReplicationScenarios.java index 7836c47..5be3e9c 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestReplicationScenarios.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestReplicationScenarios.java @@ -90,11 +90,9 @@ public static void setUpBeforeClass() throws Exception { WindowsPathUtil.convertPathsFromWindowsToHdfs(hconf); } - hconf.setVar(HiveConf.ConfVars.METASTORE_EVENT_LISTENERS, + System.setProperty(HiveConf.ConfVars.METASTORE_EVENT_LISTENERS.varname, DBNOTIF_LISTENER_CLASSNAME); // turn on db notification listener on metastore - hconf.setBoolVar(HiveConf.ConfVars.REPLCMENABLED, true); - hconf.setVar(HiveConf.ConfVars.REPLCMDIR, TEST_PATH + "/cmroot/"); - msPort = MetaStoreUtils.startMetaStore(hconf); + msPort = MetaStoreUtils.startMetaStore(); hconf.setVar(HiveConf.ConfVars.REPLDIR,TEST_PATH + "/hrepl/"); hconf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); @@ -195,87 +193,6 @@ public void testBasic() throws IOException { } @Test - public void testBasicWithCM() throws Exception { - - String testName = "basic_with_cm"; - LOG.info("Testing "+testName); - String dbName = testName + "_" + tid; - - run("CREATE DATABASE " + dbName); - run("CREATE TABLE " + dbName + ".unptned(a string) STORED AS TEXTFILE"); - run("CREATE TABLE " + dbName + ".ptned(a string) partitioned by (b int) STORED AS TEXTFILE"); - run("CREATE TABLE " + dbName + ".unptned_empty(a string) STORED AS TEXTFILE"); - run("CREATE TABLE " + dbName + ".ptned_empty(a string) partitioned by (b int) STORED AS TEXTFILE"); - - String[] unptn_data = new String[]{ "eleven" , "twelve" }; - String[] ptn_data_1 = new String[]{ "thirteen", "fourteen", "fifteen"}; - String[] ptn_data_2 = new String[]{ "fifteen", "sixteen", "seventeen"}; - String[] ptn_data_2_later = new String[]{ "eighteen", "nineteen", "twenty"}; - String[] empty = new String[]{}; - - String unptn_locn = new Path(TEST_PATH , testName + "_unptn").toUri().getPath(); - String ptn_locn_1 = new Path(TEST_PATH , testName + "_ptn1").toUri().getPath(); - String ptn_locn_2 = new Path(TEST_PATH , testName + "_ptn2").toUri().getPath(); - String ptn_locn_2_later = new Path(TEST_PATH , testName + "_ptn2_later").toUri().getPath(); - - createTestDataFile(unptn_locn, unptn_data); - createTestDataFile(ptn_locn_1, ptn_data_1); - createTestDataFile(ptn_locn_2, ptn_data_2); - createTestDataFile(ptn_locn_2_later, ptn_data_2_later); - - run("LOAD DATA LOCAL INPATH '" + unptn_locn + "' OVERWRITE INTO TABLE " + dbName + ".unptned"); - run("SELECT * from " + dbName + ".unptned"); - verifyResults(unptn_data); - run("LOAD DATA LOCAL INPATH '" + ptn_locn_1 + "' OVERWRITE INTO TABLE " + dbName + ".ptned PARTITION(b=1)"); - run("SELECT a from " + dbName + ".ptned WHERE b=1"); - verifyResults(ptn_data_1); - run("LOAD DATA LOCAL INPATH '" + ptn_locn_2 + "' OVERWRITE INTO TABLE " + dbName + ".ptned PARTITION(b=2)"); - run("SELECT a from " + dbName + ".ptned WHERE b=2"); - verifyResults(ptn_data_2); - run("SELECT a from " + dbName + ".ptned_empty"); - verifyResults(empty); - run("SELECT * from " + dbName + ".unptned_empty"); - verifyResults(empty); - - advanceDumpDir(); - run("REPL DUMP " + dbName); - String replDumpLocn = getResult(0,0); - String replDumpId = getResult(0,1,true); - - // Table dropped after "repl dump" - run("DROP TABLE " + dbName + ".unptned"); - // Partition droppped after "repl dump" - run("ALTER TABLE " + dbName + ".ptned " + "DROP PARTITION(b=1)"); - // File changed after "repl dump" - Partition p = metaStoreClient.getPartition(dbName, "ptned", "b=2"); - Path loc = new Path(p.getSd().getLocation()); - FileSystem fs = loc.getFileSystem(hconf); - Path file = fs.listStatus(loc)[0].getPath(); - fs.delete(file, false); - fs.copyFromLocalFile(new Path(ptn_locn_2_later), file); - - run("EXPLAIN REPL LOAD " + dbName + "_dupe FROM '" + replDumpLocn + "'"); - printOutput(); - run("REPL LOAD " + dbName + "_dupe FROM '" + replDumpLocn + "'"); - - run("REPL STATUS " + dbName + "_dupe"); - verifyResults(new String[] {replDumpId}); - - run("SELECT * from " + dbName + "_dupe.unptned"); - verifyResults(unptn_data); - run("SELECT a from " + dbName + "_dupe.ptned WHERE b=1"); - verifyResults(ptn_data_1); - // Since partition(b=2) changed manually, Hive cannot find - // it in original location and cmroot, thus empty - run("SELECT a from " + dbName + "_dupe.ptned WHERE b=2"); - verifyResults(empty); - run("SELECT a from " + dbName + ".ptned_empty"); - verifyResults(empty); - run("SELECT * from " + dbName + ".unptned_empty"); - verifyResults(empty); - } - - @Test public void testIncrementalAdds() throws IOException { String testName = "incrementalAdds"; LOG.info("Testing "+testName); @@ -402,6 +319,7 @@ public void testDrops() throws IOException { run("LOAD DATA LOCAL INPATH '" + ptn_locn_2 + "' OVERWRITE INTO TABLE " + dbName + ".ptned3 PARTITION(b=2)"); verifySetup("SELECT a from " + dbName + ".ptned2 WHERE b=2", ptn_data_2); + // At this point, we've set up all the tables and ptns we're going to test drops across // Replicate it first, and then we'll drop it on the source. @@ -475,132 +393,6 @@ public void testDrops() throws IOException { } @Test - public void testDropsWithCM() throws IOException { - - String testName = "drops_with_cm"; - LOG.info("Testing "+testName); - String dbName = testName + "_" + tid; - - run("CREATE DATABASE " + dbName); - run("CREATE TABLE " + dbName + ".unptned(a string) STORED AS TEXTFILE"); - run("CREATE TABLE " + dbName + ".ptned(a string) partitioned by (b string) STORED AS TEXTFILE"); - run("CREATE TABLE " + dbName + ".ptned2(a string) partitioned by (b string) STORED AS TEXTFILE"); - - String[] unptn_data = new String[]{ "eleven" , "twelve" }; - String[] ptn_data_1 = new String[]{ "thirteen", "fourteen", "fifteen"}; - String[] ptn_data_2 = new String[]{ "fifteen", "sixteen", "seventeen"}; - String[] empty = new String[]{}; - - String unptn_locn = new Path(TEST_PATH , testName + "_unptn").toUri().getPath(); - String ptn_locn_1 = new Path(TEST_PATH , testName + "_ptn1").toUri().getPath(); - String ptn_locn_2 = new Path(TEST_PATH , testName + "_ptn2").toUri().getPath(); - - createTestDataFile(unptn_locn, unptn_data); - createTestDataFile(ptn_locn_1, ptn_data_1); - createTestDataFile(ptn_locn_2, ptn_data_2); - - run("LOAD DATA LOCAL INPATH '" + unptn_locn + "' OVERWRITE INTO TABLE " + dbName + ".unptned"); - run("SELECT * from " + dbName + ".unptned"); - verifyResults(unptn_data); - run("LOAD DATA LOCAL INPATH '" + ptn_locn_1 + "' OVERWRITE INTO TABLE " + dbName + ".ptned PARTITION(b='1')"); - run("SELECT a from " + dbName + ".ptned WHERE b='1'"); - verifyResults(ptn_data_1); - run("LOAD DATA LOCAL INPATH '" + ptn_locn_2 + "' OVERWRITE INTO TABLE " + dbName + ".ptned PARTITION(b='2')"); - run("SELECT a from " + dbName + ".ptned WHERE b='2'"); - verifyResults(ptn_data_2); - run("LOAD DATA LOCAL INPATH '" + ptn_locn_1 + "' OVERWRITE INTO TABLE " + dbName + ".ptned2 PARTITION(b='1')"); - run("SELECT a from " + dbName + ".ptned2 WHERE b='1'"); - verifyResults(ptn_data_1); - run("LOAD DATA LOCAL INPATH '" + ptn_locn_2 + "' OVERWRITE INTO TABLE " + dbName + ".ptned2 PARTITION(b='2')"); - run("SELECT a from " + dbName + ".ptned2 WHERE b='2'"); - verifyResults(ptn_data_2); - - advanceDumpDir(); - run("REPL DUMP " + dbName); - String replDumpLocn = getResult(0,0); - String replDumpId = getResult(0,1,true); - run("EXPLAIN REPL LOAD " + dbName + "_dupe FROM '" + replDumpLocn + "'"); - printOutput(); - run("REPL LOAD " + dbName + "_dupe FROM '" + replDumpLocn + "'"); - - run("REPL STATUS " + dbName + "_dupe"); - verifyResults(new String[] {replDumpId}); - - run("SELECT * from " + dbName + "_dupe.unptned"); - verifyResults(unptn_data); - run("SELECT a from " + dbName + "_dupe.ptned WHERE b='1'"); - verifyResults(ptn_data_1); - run("SELECT a from " + dbName + "_dupe.ptned WHERE b='2'"); - verifyResults(ptn_data_2); - run("SELECT a from " + dbName + "_dupe.ptned2 WHERE b='1'"); - verifyResults(ptn_data_1); - run("SELECT a from " + dbName + "_dupe.ptned2 WHERE b='2'"); - verifyResults(ptn_data_2); - - run("CREATE TABLE " + dbName + ".unptned_copy" + " AS SELECT a FROM " + dbName + ".unptned"); - run("CREATE TABLE " + dbName + ".ptned_copy" + " LIKE " + dbName + ".ptned"); - run("INSERT INTO TABLE " + dbName + ".ptned_copy" + " PARTITION(b='1') SELECT a FROM " + - dbName + ".ptned WHERE b='1'"); - run("SELECT a from " + dbName + ".unptned_copy"); - verifyResults(unptn_data); - run("SELECT a from " + dbName + ".ptned_copy"); - verifyResults(ptn_data_1); - - run("DROP TABLE " + dbName + ".unptned"); - run("ALTER TABLE " + dbName + ".ptned DROP PARTITION (b='2')"); - run("DROP TABLE " + dbName + ".ptned2"); - run("SELECT a from " + dbName + ".ptned WHERE b=2"); - verifyResults(empty); - run("SELECT a from " + dbName + ".ptned"); - verifyResults(ptn_data_1); - - advanceDumpDir(); - run("REPL DUMP " + dbName + " FROM " + replDumpId); - String postDropReplDumpLocn = getResult(0,0); - String postDropReplDumpId = getResult(0,1,true); - LOG.info("Dumped to {} with id {}->{}", postDropReplDumpLocn, replDumpId, postDropReplDumpId); - - // Drop table after dump - run("DROP TABLE " + dbName + ".unptned_copy"); - // Drop partition after dump - run("ALTER TABLE " + dbName + ".ptned_copy DROP PARTITION(b='1')"); - - run("EXPLAIN REPL LOAD " + dbName + "_dupe FROM '" + postDropReplDumpLocn + "'"); - printOutput(); - run("REPL LOAD " + dbName + "_dupe FROM '" + postDropReplDumpLocn + "'"); - - Exception e = null; - try { - Table tbl = metaStoreClient.getTable(dbName + "_dupe", "unptned"); - assertNull(tbl); - } catch (TException te) { - e = te; - } - assertNotNull(e); - assertEquals(NoSuchObjectException.class, e.getClass()); - - run("SELECT a from " + dbName + "_dupe.ptned WHERE b=2"); - verifyResults(empty); - run("SELECT a from " + dbName + "_dupe.ptned"); - verifyResults(ptn_data_1); - - Exception e2 = null; - try { - Table tbl = metaStoreClient.getTable(dbName+"_dupe","ptned2"); - assertNull(tbl); - } catch (TException te) { - e2 = te; - } - assertNotNull(e2); - assertEquals(NoSuchObjectException.class, e.getClass()); - - run("SELECT a from " + dbName + "_dupe.unptned_copy"); - verifyResults(unptn_data); - run("SELECT a from " + dbName + "_dupe.ptned_copy"); - verifyResults(ptn_data_1); - } - - @Test public void testAlters() throws IOException { String testName = "alters"; diff --git itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java index 4a82aa5..5fac14f 100644 --- itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java @@ -1808,17 +1808,6 @@ public void testResultSetMetaDataDuplicateColumnNames() throws SQLException { stmt.close(); } - @Test - public void testResultSetRowProperties() throws SQLException { - Statement stmt = con.createStatement(); - ResultSet res = - stmt.executeQuery("select * from " - + dataTypeTableName + " limit 1"); - assertFalse(res.rowDeleted()); - assertFalse(res.rowInserted()); - assertFalse(res.rowUpdated()); - } - // [url] [host] [port] [db] private static final String[][] URL_PROPERTIES = new String[][] { // binary mode diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 15e0db4..e966959 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -72,6 +72,8 @@ minillap.shared.query.files=insert_into1.q,\ orc_merge_diff_fs.q,\ unionDistinct_1.q,\ union_type_chk.q,\ + orc_ppd_basic.q,\ + orc_ppd_schema_evol_3a.q,\ cte_2.q,\ cte_4.q,\ llap_nullscan.q,\ @@ -609,7 +611,6 @@ minillaplocal.query.files=acid_globallimit.q,\ vector_udf1.q,\ vectorization_short_regress.q,\ vectorized_dynamic_partition_pruning.q,\ - vectorized_dynamic_semijoin_reduction.q,\ vectorized_ptf.q,\ windowing.q,\ windowing_gby.q,\ diff --git jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java index 6d4b2b1..93f093f 100644 --- jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java +++ jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java @@ -658,15 +658,15 @@ public boolean relative(int rows) throws SQLException { } public boolean rowDeleted() throws SQLException { - return false; + throw new SQLException("Method not supported"); } public boolean rowInserted() throws SQLException { - return false; + throw new SQLException("Method not supported"); } public boolean rowUpdated() throws SQLException { - return false; + throw new SQLException("Method not supported"); } public void setFetchDirection(int direction) throws SQLException { diff --git jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java index 2a68c91..bc49aeb 100644 --- jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java +++ jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java @@ -1,20 +1,3 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package org.apache.hive.jdbc; import static org.junit.Assert.assertEquals; diff --git llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapServiceDriver.java llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapServiceDriver.java index eba4401..169be22 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapServiceDriver.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapServiceDriver.java @@ -196,8 +196,8 @@ private void run(String[] args) throws Exception { final FileSystem fs = FileSystem.get(conf); final FileSystem lfs = FileSystem.getLocal(conf).getRawFileSystem(); - int threadCount = Math.max(1, Runtime.getRuntime().availableProcessors() / 2); - final ExecutorService executor = Executors.newFixedThreadPool(threadCount, + final ExecutorService executor = + Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() / 2, new ThreadFactoryBuilder().setNameFormat("llap-pkg-%d").build()); final CompletionService asyncRunner = new ExecutorCompletionService(executor); diff --git llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/AMReporter.java llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/AMReporter.java index 93237e6..027d8eb 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/AMReporter.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/AMReporter.java @@ -40,8 +40,6 @@ import java.util.concurrent.Delayed; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -92,6 +90,8 @@ Ignore exceptions when communicating with the AM. At a later point, report back saying the AM is dead so that tasks can be removed from the running queue. + Use a cachedThreadPool so that a few AMs going down does not affect other AppMasters. + Race: When a task completes - it sends out it's message via the regular TaskReporter. The AM after this may run another DAG, or may die. This may need to be consolidated with the LlapTaskReporter. Try ensuring there's no race between the two. @@ -118,23 +118,15 @@ volatile ListenableFuture queueLookupFuture; private final DaemonId daemonId; - public AMReporter(int numExecutors, int maxThreads, AtomicReference - localAddress, QueryFailedHandler queryFailedHandler, Configuration conf, DaemonId daemonId) { + public AMReporter(AtomicReference localAddress, + QueryFailedHandler queryFailedHandler, Configuration conf, DaemonId daemonId) { super(AMReporter.class.getName()); this.localAddress = localAddress; this.queryFailedHandler = queryFailedHandler; this.conf = conf; this.daemonId = daemonId; - if (maxThreads < numExecutors) { - maxThreads = numExecutors; - LOG.warn("maxThreads={} is less than numExecutors={}. Setting maxThreads=numExecutors", - maxThreads, numExecutors); - } - ExecutorService rawExecutor = - new ThreadPoolExecutor(numExecutors, maxThreads, - 60L, TimeUnit.SECONDS, - new LinkedBlockingQueue(), - new ThreadFactoryBuilder().setDaemon(true).setNameFormat("AMReporter %d").build()); + ExecutorService rawExecutor = Executors.newCachedThreadPool( + new ThreadFactoryBuilder().setDaemon(true).setNameFormat("AMReporter %d").build()); this.executor = MoreExecutors.listeningDecorator(rawExecutor); ExecutorService rawExecutor2 = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("AMReporterQueueDrainer").build()); diff --git llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java index 8c33fa2..6d7d4de 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java @@ -55,6 +55,7 @@ import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.VertexOrBinary; import org.apache.hadoop.hive.llap.metrics.LlapDaemonExecutorMetrics; import org.apache.hadoop.hive.llap.security.LlapSignerImpl; +import org.apache.hadoop.hive.llap.security.LlapTokenIdentifier; import org.apache.hadoop.hive.llap.tez.Converters; import org.apache.hadoop.hive.llap.tezplugins.LlapTezUtils; import org.apache.hadoop.io.DataInputBuffer; @@ -182,16 +183,14 @@ public SubmitWorkResponseProto submitWork(SubmitWorkRequestProto request) throws SignableVertexSpec vertex = extractVertexSpec(request, tokenInfo); TezEvent initialEvent = extractInitialEvent(request, tokenInfo); + if (LOG.isInfoEnabled()) { + LOG.info("Queueing container for execution: " + stringifySubmitRequest(request, vertex)); + } + QueryIdentifierProto qIdProto = vertex.getQueryIdentifier(); TezTaskAttemptID attemptId = Converters.createTaskAttemptId(vertex.getQueryIdentifier(), vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber()); String fragmentIdString = attemptId.toString(); - if (LOG.isInfoEnabled()) { - LOG.info("Queueing container for execution: fragemendId={}, {}", - fragmentIdString, stringifySubmitRequest(request, vertex)); - } - QueryIdentifierProto qIdProto = vertex.getQueryIdentifier(); - HistoryLogger.logFragmentStart(qIdProto.getApplicationIdString(), request.getContainerIdString(), localAddress.get().getHostName(), vertex.getDagName(), qIdProto.getDagIndex(), vertex.getVertexName(), request.getFragmentNumber(), request.getAttemptNumber()); @@ -479,7 +478,11 @@ public void fragmentComplete(QueryFragmentInfo fragmentInfo) { public void queryFailed(QueryIdentifier queryIdentifier) { LOG.info("Processing query failed notification for {}", queryIdentifier); List knownFragments; - knownFragments = queryTracker.getRegisteredFragments(queryIdentifier); + try { + knownFragments = queryTracker.queryComplete(queryIdentifier, -1, true); + } catch (IOException e) { + throw new RuntimeException(e); // Should never happen here, no permission check. + } LOG.info("DBG: Pending fragment count for failed query {} = {}", queryIdentifier, knownFragments.size()); for (QueryFragmentInfo fragmentInfo : knownFragments) { diff --git llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java index cca6bc6..519bfbd 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java @@ -254,9 +254,7 @@ public LlapDaemon(Configuration daemonConf, int numExecutors, long executorMemor LOG.info("Started LlapMetricsSystem with displayName: " + displayName + " sessionId: " + sessionId); - int maxAmReporterThreads = HiveConf.getIntVar(daemonConf, ConfVars.LLAP_DAEMON_AM_REPORTER_MAX_THREADS); - this.amReporter = new AMReporter(numExecutors, maxAmReporterThreads, srvAddress, - new QueryFailedHandlerProxy(), daemonConf, daemonId); + this.amReporter = new AMReporter(srvAddress, new QueryFailedHandlerProxy(), daemonConf, daemonId); SecretManager sm = null; if (UserGroupInformation.isSecurityEnabled()) { diff --git llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java index 9eaddd2..a7d7981 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java @@ -25,7 +25,6 @@ import org.apache.hadoop.hive.llap.log.LogHelpers; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.util.StringUtils; import org.apache.log4j.MDC; import org.apache.logging.slf4j.Log4jMarker; import org.apache.tez.common.CallableWithNdc; @@ -142,17 +141,14 @@ QueryFragmentInfo registerFragment(QueryIdentifier queryIdentifier, String appId String fragmentIdString, LlapTokenInfo tokenInfo) throws IOException { ReadWriteLock dagLock = getDagLock(queryIdentifier); - // Note: This is a readLock to prevent a race with queryComplete. Operations - // and mutations within this lock need to be on concurrent structures. dagLock.readLock().lock(); try { if (completedDagMap.contains(queryIdentifier)) { // Cleanup the dag lock here, since it may have been created after the query completed dagSpecificLocks.remove(queryIdentifier); - String message = "Dag " + dagName + " already complete. Rejecting fragment [" - + vertexName + ", " + fragmentNumber + ", " + attemptNumber + "]"; - LOG.info(message); - throw new RuntimeException(message); + throw new RuntimeException( + "Dag " + dagName + " already complete. Rejecting fragment [" + + vertexName + ", " + fragmentNumber + ", " + attemptNumber + "]"); } // TODO: for now, we get the secure username out of UGI... after signing, we can take it // out of the request provided that it's signed. @@ -215,22 +211,6 @@ void fragmentComplete(QueryFragmentInfo fragmentInfo) { } } - List getRegisteredFragments(QueryIdentifier queryIdentifier) { - ReadWriteLock dagLock = getDagLock(queryIdentifier); - dagLock.readLock().lock(); - try { - QueryInfo queryInfo = queryInfoMap.get(queryIdentifier); - if (queryInfo == null) { - // Race with queryComplete - LOG.warn("Unknown query: Returning an empty list of fragments"); - return Collections.emptyList(); - } - return queryInfo.getRegisteredFragments(); - } finally { - dagLock.readLock().unlock(); - } - } - /** * Register completion for a query * @param queryIdentifier @@ -251,7 +231,8 @@ void fragmentComplete(QueryFragmentInfo fragmentInfo) { deleteDelay); queryInfoMap.remove(queryIdentifier); if (queryInfo == null) { - // Should not happen. + // One case where this happens is when a query is killed via an explicit signal, and then + // another message is received from teh AMHeartbeater. LOG.warn("Ignoring query complete for unknown dag: {}", queryIdentifier); return Collections.emptyList(); } diff --git llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/comparator/ShortestJobFirstComparator.java llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/comparator/ShortestJobFirstComparator.java index 9b6c894..238ae9e 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/comparator/ShortestJobFirstComparator.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/comparator/ShortestJobFirstComparator.java @@ -54,22 +54,17 @@ public int compare(TaskWrapper t1, TaskWrapper t2) { // it's parent hierarchy. selfAndUpstreamComplete indicates how many of these have completed. int knownPending1 = fri1.getNumSelfAndUpstreamTasks() - fri1.getNumSelfAndUpstreamCompletedTasks(); int knownPending2 = fri2.getNumSelfAndUpstreamTasks() - fri2.getNumSelfAndUpstreamCompletedTasks(); - // longer the wait time for an attempt wrt to its start time, higher the priority it gets - long waitTime1 = fri1.getCurrentAttemptStartTime() - fri1.getFirstAttemptStartTime(); - long waitTime2 = fri2.getCurrentAttemptStartTime() - fri2.getFirstAttemptStartTime(); - - if (waitTime1 == 0 || waitTime2 == 0) { - return knownPending1 - knownPending2; + if (knownPending1 < knownPending2) { + return -1; + } else if (knownPending1 > knownPending2) { + return 1; } - double ratio1 = (double) knownPending1 / (double) waitTime1; - double ratio2 = (double) knownPending2 / (double) waitTime2; - if (ratio1 < ratio2) { + if (fri1.getFirstAttemptStartTime() < fri2.getFirstAttemptStartTime()) { return -1; - } else if (ratio1 > ratio2) { + } else if (fri1.getFirstAttemptStartTime() > fri2.getFirstAttemptStartTime()) { return 1; } - return 0; } } diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java index 9ef9ca4..2f79828 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.llap.io.api.impl; -import java.util.ArrayList; - import java.io.IOException; import java.util.LinkedList; import java.util.List; @@ -100,6 +98,7 @@ public LlapRecordReader(JobConf job, FileSplit split, List includedCols this.executor = executor; this.jobConf = job; this.split = split; + this.columnIds = includedCols; this.sarg = ConvertAstToSearchArg.createFromConf(job); this.columnNames = ColumnProjectionUtils.getReadColumnNames(job); final String fragmentId = LlapTezUtils.getFragmentId(job); @@ -123,13 +122,12 @@ public LlapRecordReader(JobConf job, FileSplit split, List includedCols rbCtx = ctx != null ? ctx : LlapInputFormat.createFakeVrbCtx(mapWork); if (includedCols == null) { // Assume including everything means the VRB will have everything. - includedCols = new ArrayList<>(rbCtx.getRowColumnTypeInfos().length); - for (int i = 0; i < rbCtx.getRowColumnTypeInfos().length; ++i) { - includedCols.add(i); - } + this.columnCount = rbCtx.getRowColumnTypeInfos().length; + } else { + this.columnCount = columnIds.size(); } - this.columnIds = includedCols; - this.columnCount = columnIds.size(); + + int partitionColumnCount = rbCtx.getPartitionColumnCount(); if (partitionColumnCount > 0) { @@ -171,7 +169,8 @@ private boolean checkOrcSchemaEvolution() { SchemaEvolution schemaEvolution = new SchemaEvolution( fileSchema, rp.getReaderSchema(), includedColumns); for (int i = 0; i < columnCount; ++i) { - if (!schemaEvolution.isPPDSafeConversion(columnIds.get(i))) { + int colId = columnIds == null ? i : columnIds.get(i); + if (!schemaEvolution.isPPDSafeConversion(colId)) { LlapIoImpl.LOG.warn("Unsupported schema evolution! Disabling Llap IO for {}", split); return false; } @@ -215,7 +214,7 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti // VRB was created from VrbCtx, so we already have pre-allocated column vectors for (int i = 0; i < cvb.cols.length; ++i) { // Return old CVs (if any) to caller. We assume these things all have the same schema. - cvb.swapColumnVector(i, value.cols, columnIds.get(i)); + cvb.swapColumnVector(i, value.cols, columnIds == null ? i : columnIds.get(i)); } value.selectedInUse = false; value.size = cvb.size; diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/LineRrOffsetReader.java llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/LineRrOffsetReader.java index 3fc1fa2..1656381 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/LineRrOffsetReader.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/LineRrOffsetReader.java @@ -1,20 +1,3 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package org.apache.hadoop.hive.llap.io.encoded; import java.io.IOException; diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/PassThruOffsetReader.java llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/PassThruOffsetReader.java index ba2b52d..43ff991 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/PassThruOffsetReader.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/PassThruOffsetReader.java @@ -1,20 +1,3 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package org.apache.hadoop.hive.llap.io.encoded; import java.io.IOException; diff --git llap-server/src/main/resources/package.py llap-server/src/main/resources/package.py index 62a8d08..380c6a8 100644 --- llap-server/src/main/resources/package.py +++ llap-server/src/main/resources/package.py @@ -85,7 +85,7 @@ def main(args): parser.add_argument("--args", default="") parser.add_argument("--name", default="llap0") parser.add_argument("--loglevel", default="INFO") - parser.add_argument("--logger", default="query-routing") + parser.add_argument("--logger", default="RFA") parser.add_argument("--chaosmonkey", type=int, default=0) parser.add_argument("--slider-am-container-mb", type=int, default=1024) parser.add_argument("--slider-appconfig-global", nargs='*', type=slider_appconfig_global_property, action='append') diff --git llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorTestHelpers.java llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorTestHelpers.java index 2cd6542..73bb68e 100644 --- llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorTestHelpers.java +++ llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorTestHelpers.java @@ -47,10 +47,10 @@ private static final Logger LOG = LoggerFactory.getLogger(TestTaskExecutorService.class); - public static MockRequest createMockRequest(int fragmentNum, int parallelism, long firstAttemptStartTime, - long currentAttemptStartTime, boolean canFinish, long workTime) { + public static MockRequest createMockRequest(int fragmentNum, int parallelism, long startTime, + boolean canFinish, long workTime) { SubmitWorkRequestProto - request = createSubmitWorkRequestProto(fragmentNum, parallelism, firstAttemptStartTime, currentAttemptStartTime); + request = createSubmitWorkRequestProto(fragmentNum, parallelism, startTime); return createMockRequest(canFinish, workTime, request); } @@ -83,16 +83,16 @@ public static QueryInfo createQueryInfo() { } public static SubmitWorkRequestProto createSubmitWorkRequestProto( - int fragmentNumber, int selfAndUpstreamParallelism, long firstAttemptStartTime, - long currentAttemptStartTime) { - return createSubmitWorkRequestProto(fragmentNumber, selfAndUpstreamParallelism, 0, firstAttemptStartTime, - currentAttemptStartTime, 1); + int fragmentNumber, int selfAndUpstreamParallelism, + long attemptStartTime) { + return createSubmitWorkRequestProto(fragmentNumber, selfAndUpstreamParallelism, 0, + attemptStartTime, 1); } public static SubmitWorkRequestProto createSubmitWorkRequestProto( int fragmentNumber, int selfAndUpstreamParallelism, - int selfAndUpstreamComplete, long firstAttemptStartTime, - long currentAttemptStartTime, int withinDagPriority) { + int selfAndUpstreamComplete, + long attemptStartTime, int withinDagPriority) { ApplicationId appId = ApplicationId.newInstance(9999, 72); TezDAGID dagId = TezDAGID.getInstance(appId, 1); TezVertexID vId = TezVertexID.getInstance(dagId, 35); @@ -124,8 +124,7 @@ public static SubmitWorkRequestProto createSubmitWorkRequestProto( .setFragmentRuntimeInfo(LlapDaemonProtocolProtos .FragmentRuntimeInfo .newBuilder() - .setFirstAttemptStartTime(firstAttemptStartTime) - .setCurrentAttemptStartTime(currentAttemptStartTime) + .setFirstAttemptStartTime(attemptStartTime) .setNumSelfAndUpstreamTasks(selfAndUpstreamParallelism) .setNumSelfAndUpstreamCompletedTasks(selfAndUpstreamComplete) .setWithinDagPriority(withinDagPriority) diff --git llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TestTaskExecutorService.java llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TestTaskExecutorService.java index de7f2fc..ac4e5f1 100644 --- llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TestTaskExecutorService.java +++ llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TestTaskExecutorService.java @@ -49,10 +49,10 @@ @Test(timeout = 5000) public void testPreemptionQueueComparator() throws InterruptedException { - TaskWrapper r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 2, 100, 200), false, 100000); - TaskWrapper r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 4, 200, 300), false, 100000); - TaskWrapper r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 6, 300, 400), false, 1000000); - TaskWrapper r4 = createTaskWrapper(createSubmitWorkRequestProto(4, 8, 400, 500), false, 1000000); + TaskWrapper r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 2, 100), false, 100000); + TaskWrapper r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 4, 200), false, 100000); + TaskWrapper r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 6, 300), false, 1000000); + TaskWrapper r4 = createTaskWrapper(createSubmitWorkRequestProto(4, 8, 400), false, 1000000); BlockingQueue queue = new PriorityBlockingQueue<>(4, new TaskExecutorService.PreemptionQueueComparator()); @@ -71,8 +71,8 @@ public void testPreemptionQueueComparator() throws InterruptedException { @Test(timeout = 10000) public void testFinishablePreeptsNonFinishable() throws InterruptedException { - MockRequest r1 = createMockRequest(1, 1, 100, 200, false, 5000l); - MockRequest r2 = createMockRequest(2, 1, 100, 200, true, 1000l); + MockRequest r1 = createMockRequest(1, 1, 100, false, 5000l); + MockRequest r2 = createMockRequest(2, 1, 100, true, 1000l); TaskExecutorServiceForTest taskExecutorService = new TaskExecutorServiceForTest(1, 2, ShortestJobFirstComparator.class.getName(), true); taskExecutorService.init(new Configuration()); @@ -110,7 +110,7 @@ public void testFinishablePreeptsNonFinishable() throws InterruptedException { @Test(timeout = 10000) public void testPreemptionStateOnTaskMoveToFinishableState() throws InterruptedException { - MockRequest r1 = createMockRequest(1, 1, 100, 200, false, 20000l); + MockRequest r1 = createMockRequest(1, 1, 100, false, 20000l); TaskExecutorServiceForTest taskExecutorService = new TaskExecutorServiceForTest(1, 2, ShortestJobFirstComparator.class.getName(), true); @@ -142,7 +142,7 @@ public void testPreemptionStateOnTaskMoveToFinishableState() throws InterruptedE @Test(timeout = 10000) public void testPreemptionStateOnTaskMoveToNonFinishableState() throws InterruptedException { - MockRequest r1 = createMockRequest(1, 1, 100, 200, true, 20000l); + MockRequest r1 = createMockRequest(1, 1, 100, true, 20000l); TaskExecutorServiceForTest taskExecutorService = new TaskExecutorServiceForTest(1, 2, ShortestJobFirstComparator.class.getName(), true); @@ -176,11 +176,11 @@ public void testPreemptionStateOnTaskMoveToNonFinishableState() throws Interrupt @Test(timeout = 10000) public void testWaitQueuePreemption() throws InterruptedException { - MockRequest r1 = createMockRequest(1, 1, 100, 200, true, 20000l); - MockRequest r2 = createMockRequest(2, 1, 200, 330, false, 20000l); - MockRequest r3 = createMockRequest(3, 1, 300, 420, false, 20000l); - MockRequest r4 = createMockRequest(4, 1, 400, 510, false, 20000l); - MockRequest r5 = createMockRequest(5, 1, 500, 610, true, 20000l); + MockRequest r1 = createMockRequest(1, 1, 100, true, 20000l); + MockRequest r2 = createMockRequest(2, 1, 200, false, 20000l); + MockRequest r3 = createMockRequest(3, 1, 300, false, 20000l); + MockRequest r4 = createMockRequest(4, 1, 400, false, 20000l); + MockRequest r5 = createMockRequest(5, 1, 500, true, 20000l); TaskExecutorServiceForTest taskExecutorService = new TaskExecutorServiceForTest(1, 2, ShortestJobFirstComparator.class.getName(), true); diff --git llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/comparator/TestShortestJobFirstComparator.java llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/comparator/TestShortestJobFirstComparator.java index e82f756..f50c657 100644 --- llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/comparator/TestShortestJobFirstComparator.java +++ llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/comparator/TestShortestJobFirstComparator.java @@ -28,11 +28,11 @@ @Test(timeout = 60000) public void testWaitQueueComparator() throws InterruptedException { - TaskWrapper r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 2, 100, 200), false, 100000); - TaskWrapper r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 4, 200, 300), false, 100000); - TaskWrapper r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 6, 300, 400), false, 1000000); - TaskWrapper r4 = createTaskWrapper(createSubmitWorkRequestProto(4, 8, 400, 500), false, 1000000); - TaskWrapper r5 = createTaskWrapper(createSubmitWorkRequestProto(5, 10, 500, 600), false, 1000000); + TaskWrapper r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 2, 100), false, 100000); + TaskWrapper r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 4, 200), false, 100000); + TaskWrapper r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 6, 300), false, 1000000); + TaskWrapper r4 = createTaskWrapper(createSubmitWorkRequestProto(4, 8, 400), false, 1000000); + TaskWrapper r5 = createTaskWrapper(createSubmitWorkRequestProto(5, 10, 500), false, 1000000); EvictingPriorityBlockingQueue queue = new EvictingPriorityBlockingQueue<>( new ShortestJobFirstComparator(), 4); assertNull(queue.offer(r1)); @@ -50,11 +50,11 @@ public void testWaitQueueComparator() throws InterruptedException { assertEquals(r3, queue.take()); assertEquals(r4, queue.take()); - r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 2, 100, 200), true, 100000); - r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 4, 200, 300), true, 100000); - r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 6, 300, 400), true, 1000000); - r4 = createTaskWrapper(createSubmitWorkRequestProto(4, 8, 400, 500), true, 1000000); - r5 = createTaskWrapper(createSubmitWorkRequestProto(5, 10, 500, 600), true, 1000000); + r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 2, 100), true, 100000); + r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 4, 200), true, 100000); + r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 6, 300), true, 1000000); + r4 = createTaskWrapper(createSubmitWorkRequestProto(4, 8, 400), true, 1000000); + r5 = createTaskWrapper(createSubmitWorkRequestProto(5, 10, 500), true, 1000000); queue = new EvictingPriorityBlockingQueue( new ShortestJobFirstComparator(), 4); assertNull(queue.offer(r1)); @@ -72,11 +72,11 @@ public void testWaitQueueComparator() throws InterruptedException { assertEquals(r3, queue.take()); assertEquals(r4, queue.take()); - r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 1, 100, 1000), true, 100000); - r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 1, 200, 900), false, 100000); - r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 1, 300, 800), true, 1000000); - r4 = createTaskWrapper(createSubmitWorkRequestProto(4, 1, 400, 700), false, 1000000); - r5 = createTaskWrapper(createSubmitWorkRequestProto(5, 10, 500, 600), true, 1000000); + r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 1, 100), true, 100000); + r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 1, 200), false, 100000); + r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 1, 300), true, 1000000); + r4 = createTaskWrapper(createSubmitWorkRequestProto(4, 1, 400), false, 1000000); + r5 = createTaskWrapper(createSubmitWorkRequestProto(5, 10, 500), true, 1000000); queue = new EvictingPriorityBlockingQueue( new ShortestJobFirstComparator(), 4); assertNull(queue.offer(r1)); @@ -94,11 +94,11 @@ public void testWaitQueueComparator() throws InterruptedException { assertEquals(r5, queue.take()); assertEquals(r2, queue.take()); - r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 2, 100, 200), true, 100000); - r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 4, 200, 300), false, 100000); - r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 6, 300, 400), true, 1000000); - r4 = createTaskWrapper(createSubmitWorkRequestProto(4, 8, 400, 500), false, 1000000); - r5 = createTaskWrapper(createSubmitWorkRequestProto(5, 10, 500, 600), true, 1000000); + r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 2, 100), true, 100000); + r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 4, 200), false, 100000); + r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 6, 300), true, 1000000); + r4 = createTaskWrapper(createSubmitWorkRequestProto(4, 8, 400), false, 1000000); + r5 = createTaskWrapper(createSubmitWorkRequestProto(5, 10, 500), true, 1000000); queue = new EvictingPriorityBlockingQueue( new ShortestJobFirstComparator(), 4); assertNull(queue.offer(r1)); @@ -116,11 +116,11 @@ public void testWaitQueueComparator() throws InterruptedException { assertEquals(r5, queue.take()); assertEquals(r2, queue.take()); - r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 2, 100, 200), true, 100000); - r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 4, 200, 300), false, 100000); - r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 6, 300, 400), false, 1000000); - r4 = createTaskWrapper(createSubmitWorkRequestProto(4, 8, 400, 500), false, 1000000); - r5 = createTaskWrapper(createSubmitWorkRequestProto(5, 10, 500, 600), true, 1000000); + r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 2, 100), true, 100000); + r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 4, 200), false, 100000); + r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 6, 300), false, 1000000); + r4 = createTaskWrapper(createSubmitWorkRequestProto(4, 8, 400), false, 1000000); + r5 = createTaskWrapper(createSubmitWorkRequestProto(5, 10, 500), true, 1000000); queue = new EvictingPriorityBlockingQueue( new ShortestJobFirstComparator(), 4); assertNull(queue.offer(r1)); @@ -138,11 +138,11 @@ public void testWaitQueueComparator() throws InterruptedException { assertEquals(r2, queue.take()); assertEquals(r3, queue.take()); - r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 2, 100, 200), false, 100000); - r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 4, 200, 300), true, 100000); - r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 6, 300, 400), true, 1000000); - r4 = createTaskWrapper(createSubmitWorkRequestProto(4, 8, 400, 500), true, 1000000); - r5 = createTaskWrapper(createSubmitWorkRequestProto(5, 10, 500, 600), true, 1000000); + r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 2, 100), false, 100000); + r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 4, 200), true, 100000); + r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 6, 300), true, 1000000); + r4 = createTaskWrapper(createSubmitWorkRequestProto(4, 8, 400), true, 1000000); + r5 = createTaskWrapper(createSubmitWorkRequestProto(5, 10, 500), true, 1000000); queue = new EvictingPriorityBlockingQueue( new ShortestJobFirstComparator(), 4); assertNull(queue.offer(r1)); @@ -163,9 +163,9 @@ public void testWaitQueueComparator() throws InterruptedException { @Test(timeout = 60000) public void testWaitQueueComparatorWithinDagPriority() throws InterruptedException { - TaskWrapper r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 1, 0, 10, 100, 10), false, 100000); - TaskWrapper r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 1, 0, 10, 100, 1), false, 100000); - TaskWrapper r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 1, 0, 10, 100, 5), false, 100000); + TaskWrapper r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 1, 0, 100, 10), false, 100000); + TaskWrapper r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 1, 0, 100, 1), false, 100000); + TaskWrapper r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 1, 0, 100, 5), false, 100000); EvictingPriorityBlockingQueue queue = new EvictingPriorityBlockingQueue<>( new ShortestJobFirstComparator(), 4); @@ -181,9 +181,9 @@ public void testWaitQueueComparatorWithinDagPriority() throws InterruptedExcepti @Test(timeout = 60000) public void testWaitQueueComparatorParallelism() throws InterruptedException { - TaskWrapper r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 10, 3, 10, 100, 1), false, 100000); // 7 pending - TaskWrapper r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 10, 7, 10, 100, 1), false, 100000); // 3 pending - TaskWrapper r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 10, 5, 10, 100, 1), false, 100000); // 5 pending + TaskWrapper r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 10, 3, 100, 1), false, 100000); // 7 pending + TaskWrapper r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 10, 7, 100, 1), false, 100000); // 3 pending + TaskWrapper r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 10, 5, 100, 1), false, 100000); // 5 pending EvictingPriorityBlockingQueue queue = new EvictingPriorityBlockingQueue<>( new ShortestJobFirstComparator(), 4); @@ -196,39 +196,4 @@ public void testWaitQueueComparatorParallelism() throws InterruptedException { assertEquals(r3, queue.take()); assertEquals(r1, queue.take()); } - - @Test(timeout = 60000) - public void testWaitQueueComparatorAging() throws InterruptedException { - TaskWrapper r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 10, 100, 200), true, 100000); - TaskWrapper r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 20, 100, 200), true, 100000); - TaskWrapper r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 30, 100, 200), true, 100000); - - EvictingPriorityBlockingQueue queue = new EvictingPriorityBlockingQueue<>( - new ShortestJobFirstComparator(), 4); - - assertNull(queue.offer(r1)); - assertNull(queue.offer(r2)); - assertNull(queue.offer(r3)); - - assertEquals(r1, queue.take()); - assertEquals(r2, queue.take()); - assertEquals(r3, queue.take()); - - // priority = 10 / (200 - 100) = 0.01 - r1 = createTaskWrapper(createSubmitWorkRequestProto(1, 10, 100, 200), true, 100000); - // priority = 20 / (3000 - 100) = 0.0069 - r2 = createTaskWrapper(createSubmitWorkRequestProto(2, 20, 100, 3000), true, 100000); - // priority = 30 / (4000 - 100) = 0.0076 - r3 = createTaskWrapper(createSubmitWorkRequestProto(3, 30, 100, 4000), true, 100000); - - queue = new EvictingPriorityBlockingQueue<>(new ShortestJobFirstComparator(), 4); - - assertNull(queue.offer(r1)); - assertNull(queue.offer(r2)); - assertNull(queue.offer(r3)); - - assertEquals(r2, queue.take()); - assertEquals(r3, queue.take()); - assertEquals(r1, queue.take()); - } } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/DefaultHiveMetaHook.java metastore/src/java/org/apache/hadoop/hive/metastore/DefaultHiveMetaHook.java new file mode 100644 index 0000000..0957945 --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/DefaultHiveMetaHook.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; + +public abstract class DefaultHiveMetaHook implements HiveMetaHook { + /** + * Called after successfully after INSERT [OVERWRITE] statement is executed. + * @param table table definition + * @param overwrite true if it is INSERT OVERWRITE + * + * @throws MetaException + */ + public abstract void commitInsertTable(Table table, boolean overwrite) throws MetaException; + + /** + * called before commit insert method is called + * @param table table definition + * @param overwrite true if it is INSERT OVERWRITE + * + * @throws MetaException + */ + public abstract void preInsertTable(Table table, boolean overwrite) throws MetaException; + + /** + * called in case pre commit or commit insert fail. + * @param table table definition + * @param overwrite true if it is INSERT OVERWRITE + * + * @throws MetaException + */ + public abstract void rollbackInsertTable(Table table, boolean overwrite) throws MetaException; +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index bae39ac..64d9fc1 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -729,7 +729,6 @@ private void updatePartColumnStatsForAlterColumns(RawStore msdb, Partition oldPa assert (partsColStats.size() <= 1); for (ColumnStatistics partColStats : partsColStats) { //actually only at most one loop List statsObjs = partColStats.getStatsObj(); - List deletedCols = new ArrayList(); for (ColumnStatisticsObj statsObj : statsObjs) { boolean found =false; for (FieldSchema newCol : newCols) { @@ -742,10 +741,8 @@ private void updatePartColumnStatsForAlterColumns(RawStore msdb, Partition oldPa if (!found) { msdb.deletePartitionColumnStatistics(dbName, tableName, oldPartName, partVals, statsObj.getColName()); - deletedCols.add(statsObj.getColName()); } } - StatsSetupConst.removeColumnStatsState(newPart.getParameters(), deletedCols); } } catch (NoSuchObjectException nsoe) { LOG.debug("Could not find db entry." + nsoe); @@ -830,7 +827,6 @@ void alterTableUpdateTableColumnStats(RawStore msdb, } else { List statsObjs = colStats.getStatsObj(); if (statsObjs != null) { - List deletedCols = new ArrayList(); for (ColumnStatisticsObj statsObj : statsObjs) { boolean found = false; for (FieldSchema newCol : newCols) { @@ -845,14 +841,11 @@ void alterTableUpdateTableColumnStats(RawStore msdb, if (!newDbName.equals(dbName) || !newTableName.equals(tableName)) { msdb.deleteTableColumnStatistics(dbName, tableName, statsObj.getColName()); newStatsObjs.add(statsObj); - deletedCols.add(statsObj.getColName()); } } else { msdb.deleteTableColumnStatistics(dbName, tableName, statsObj.getColName()); - deletedCols.add(statsObj.getColName()); } } - StatsSetupConst.removeColumnStatsState(newTable.getParameters(), deletedCols); } } } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaHookV2.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaHookV2.java deleted file mode 100644 index e691c1f..0000000 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaHookV2.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.metastore; - -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Table; - -public interface HiveMetaHookV2 extends HiveMetaHook { - /** - * Called after successfully after INSERT [OVERWRITE] statement is executed. - * @param table table definition - * @param overwrite true if it is INSERT OVERWRITE - * - * @throws MetaException - */ - public void commitInsertTable(Table table, boolean overwrite) throws MetaException; - - /** - * called before commit insert method is called - * @param table table definition - * @param overwrite true if it is INSERT OVERWRITE - * - * @throws MetaException - */ - public void preInsertTable(Table table, boolean overwrite) throws MetaException; - - /** - * called in case pre commit or commit insert fail. - * @param table table definition - * @param overwrite true if it is INSERT OVERWRITE - * - * @throws MetaException - */ - public void rollbackInsertTable(Table table, boolean overwrite) throws MetaException; -} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index b5d007d..c32104f 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -2219,14 +2219,15 @@ public void addDynamicPartitions(long txnId, String dbName, String tableName, public void insertTable(Table table, boolean overwrite) throws MetaException { boolean failed = true; HiveMetaHook hook = getHook(table); - if (hook == null || !(hook instanceof HiveMetaHookV2)) { + if (hook == null || !(hook instanceof DefaultHiveMetaHook)) { return; } - HiveMetaHookV2 hiveMetaHook = (HiveMetaHookV2) hook; + DefaultHiveMetaHook hiveMetaHook = (DefaultHiveMetaHook) hook; try { - hiveMetaHook.preInsertTable(table, overwrite); hiveMetaHook.commitInsertTable(table, overwrite); - } finally { + failed = false; + } + finally { if (failed) { hiveMetaHook.rollbackInsertTable(table, overwrite); } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java index 84ec332..a07c695 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java @@ -1509,7 +1509,7 @@ void addDynamicPartitions(long txnId, String dbName, String tableName, List1) { - result[1] = uriAndFragment[1]; - } - return result; - } - /** * Thread to clear old files of cmroot recursively */ @@ -309,28 +231,24 @@ public void run() { for (FileStatus file : files) { long modifiedTime = file.getModificationTime(); if (now - modifiedTime > secRetain*1000) { - try { - if (fs.getXAttrs(file.getPath()).containsKey(REMAIN_IN_TRASH_TAG)) { - boolean succ = Trash.moveToAppropriateTrash(fs, file.getPath(), hiveConf); - if (succ) { - if (LOG.isDebugEnabled()) { - LOG.debug("Move " + file.toString() + " to trash"); - } - } else { - LOG.warn("Fail to move " + file.toString() + " to trash"); + if (fs.getXAttrs(file.getPath()).containsKey(REMAIN_IN_TRASH_TAG)) { + boolean succ = Trash.moveToAppropriateTrash(fs, file.getPath(), hiveConf); + if (succ) { + if (LOG.isDebugEnabled()) { + LOG.debug("Move " + file.toString() + " to trash"); } } else { - boolean succ = fs.delete(file.getPath(), false); - if (succ) { - if (LOG.isDebugEnabled()) { - LOG.debug("Remove " + file.toString()); - } - } else { - LOG.warn("Fail to remove " + file.toString()); + LOG.warn("Fail to move " + file.toString() + " to trash"); + } + } else { + boolean succ = fs.delete(file.getPath(), false); + if (succ) { + if (LOG.isDebugEnabled()) { + LOG.debug("Remove " + file.toString()); } + } else { + LOG.warn("Fail to remove " + file.toString()); } - } catch (UnsupportedOperationException e) { - LOG.warn("Error getting xattr for " + file.getPath().toString()); } } } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java index c0518ad..805db34 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java @@ -1395,7 +1395,6 @@ public HeartbeatTxnRangeResponse heartbeatTxnRange(HeartbeatTxnRangeRequest rqst throws MetaException { try { Connection dbConn = null; - Statement stmt = null; HeartbeatTxnRangeResponse rsp = new HeartbeatTxnRangeResponse(); Set nosuch = new HashSet(); Set aborted = new HashSet(); @@ -1409,32 +1408,11 @@ public HeartbeatTxnRangeResponse heartbeatTxnRange(HeartbeatTxnRangeRequest rqst * would care about (which would have required SERIALIZABLE) */ dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED); - /*do fast path first (in 1 statement) if doesn't work, rollback and do the long version*/ - stmt = dbConn.createStatement(); - List queries = new ArrayList<>(); - int numTxnsToHeartbeat = (int) (rqst.getMax() - rqst.getMin() + 1); - List txnIds = new ArrayList<>(numTxnsToHeartbeat); - for (long txn = rqst.getMin(); txn <= rqst.getMax(); txn++) { - txnIds.add(txn); - } - TxnUtils.buildQueryWithINClause(conf, queries, - new StringBuilder("update TXNS set txn_last_heartbeat = " + getDbTime(dbConn) + - " where txn_state = " + quoteChar(TXN_OPEN) + " and "), - new StringBuilder(""), txnIds, "txn_id", true, false); - int updateCnt = 0; - for (String query : queries) { - LOG.debug("Going to execute update <" + query + ">"); - updateCnt += stmt.executeUpdate(query); - } - if (updateCnt == numTxnsToHeartbeat) { - //fast pass worked, i.e. all txns we were asked to heartbeat were Open as expected - dbConn.commit(); - return rsp; - } - //if here, do the slow path so that we can return info txns which were not in expected state - dbConn.rollback(); for (long txn = rqst.getMin(); txn <= rqst.getMax(); txn++) { try { + //todo: do all updates in 1 SQL statement and check update count + //if update count is less than was requested, go into more expensive checks + //for each txn heartbeatTxn(dbConn, txn); } catch (NoSuchTxnException e) { nosuch.add(txn); @@ -1450,7 +1428,7 @@ public HeartbeatTxnRangeResponse heartbeatTxnRange(HeartbeatTxnRangeRequest rqst throw new MetaException("Unable to select from transaction database " + StringUtils.stringifyException(e)); } finally { - close(null, stmt, dbConn); + closeDbConn(dbConn); } } catch (RetryException e) { return heartbeatTxnRange(rqst); diff --git packaging/src/main/assembly/bin.xml packaging/src/main/assembly/bin.xml index 84686ee..4327977 100644 --- packaging/src/main/assembly/bin.xml +++ packaging/src/main/assembly/bin.xml @@ -59,16 +59,6 @@ - lib - false - false - true - - org.apache.hive.hcatalog:hive-hcatalog-core - org.apache.hive.hcatalog:hive-hcatalog-server-extensions - - - hcatalog/share/hcatalog false false diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt deleted file mode 100644 index 97ab7aa..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt +++ /dev/null @@ -1,101 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterColumnBetween; -import org.apache.hadoop.hive.ql.plan.DynamicValue; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.sql.Timestamp; -import org.apache.hadoop.hive.common.type.HiveDecimal; - -public class extends FilterColumnBetween { - - private static final long serialVersionUID = 1L; - - private static final Logger LOG = LoggerFactory.getLogger(.class); - - protected DynamicValue leftDynamicValue; - protected DynamicValue rightDynamicValue; - protected transient boolean initialized = false; - protected transient boolean isLeftOrRightNull = false; - - public (int colNum, DynamicValue leftValue, DynamicValue rightValue) { - super(colNum, , ); - this.leftDynamicValue = leftValue; - this.rightDynamicValue = rightValue; - } - - public () { - } - - public DynamicValue getLeftDynamicValue() { - return leftDynamicValue; - } - - public void setLeftDynamicValue(DynamicValue leftValue) { - this.leftDynamicValue = leftValue; - } - - public DynamicValue getRightDynamicValue() { - return rightDynamicValue; - } - - public void getRightDynamicValue(DynamicValue rightValue) { - this.rightDynamicValue = rightValue; - } - - @Override - public void init(Configuration conf) { - super.init(conf); - leftDynamicValue.setConf(conf); - rightDynamicValue.setConf(conf); - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - if (!initialized) { - Object lVal = leftDynamicValue.getValue(); - Object rVal = rightDynamicValue.getValue(); - if (lVal == null || rVal == null) { - isLeftOrRightNull = true; - } else { - min = PrimitiveObjectInspectorUtils.( - lVal, leftDynamicValue.getObjectInspector()); - setLeftValue(min); - - max = PrimitiveObjectInspectorUtils.( - rVal, rightDynamicValue.getObjectInspector()); - setRightValue(max); - } - initialized = true; - } - - // Special case for dynamic values - min/max can be null - if (isLeftOrRightNull) { - // Entire batch is filtered out - batch.size = 0; - } - - super.evaluate(batch); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt index 62d2254..d68edfa 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt @@ -154,22 +154,6 @@ public class extends VectorExpression { return "boolean"; } - public HiveDecimal getLeftValue() { - return leftValue; - } - - public void setLeftValue(HiveDecimal value) { - this.leftValue = value; - } - - public HiveDecimal getRightValue() { - return rightValue; - } - - public void setRightValue(HiveDecimal value) { - this.rightValue = value; - } - @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt index 16d4aaf..e8049da 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt @@ -161,19 +161,19 @@ public class extends VectorExpression { this.colNum = colNum; } - public byte[] getLeftValue() { + public byte[] getLeft() { return left; } - public void setLeftValue(byte[] value) { + public void setLeft(byte[] value) { this.left = value; } - public byte[] getRightValue() { + public byte[] getRight() { return right; } - public void setRightValue(byte[] value) { + public void setRight(byte[] value) { this.right = value; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt index 806148f..4298d79 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt @@ -153,22 +153,6 @@ public class extends VectorExpression { return "boolean"; } - public Timestamp getLeftValue() { - return leftValue; - } - - public void setLeftValue(Timestamp value) { - this.leftValue = value; - } - - public Timestamp getRightValue() { - return rightValue; - } - - public void setRightValue(Timestamp value) { - this.rightValue = value; - } - @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt index d350dcb..94a174d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt @@ -163,19 +163,19 @@ public class extends VectorExpression { this.colNum = colNum; } - public byte[] getLeftValue() { + public byte[] getLeft() { return left; } - public void setLeftValue(byte[] value) { + public void setLeft(byte[] value) { this.left = value; } - public byte[] getRightValue() { + public byte[] getRight() { return right; } - public void setRightValue(byte[] value) { + public void setRight(byte[] value) { this.right = value; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index ad701f6..3b1f47b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -63,6 +63,8 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.io.HdfsUtils; +import org.apache.hadoop.hive.metastore.HiveMetaHook; +import org.apache.hadoop.hive.metastore.DefaultHiveMetaHook; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.PartitionDropOptions; import org.apache.hadoop.hive.metastore.TableType; @@ -138,6 +140,7 @@ import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.DDLSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; +import org.apache.hadoop.hive.ql.parse.PreInsertTableDesc; import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AbortTxnsDesc; @@ -567,6 +570,10 @@ public int execute(DriverContext driverContext) { if (insertTableDesc != null) { return insertCommitWork(db, insertTableDesc); } + PreInsertTableDesc preInsertTableDesc = work.getPreInsertTableDesc(); + if (preInsertTableDesc != null) { + return preInsertWork(db, preInsertTableDesc); + } } catch (Throwable e) { failed(e); return 1; @@ -575,13 +582,40 @@ public int execute(DriverContext driverContext) { return 0; } - private int insertCommitWork(Hive db, InsertTableDesc insertTableDesc) throws HiveException { - try { - db.getMSC().insertTable(insertTableDesc.getTable(), insertTableDesc.isOverwrite()); - return 0; + private int preInsertWork(Hive db, PreInsertTableDesc preInsertTableDesc) throws HiveException { + try{ + HiveMetaHook hook = preInsertTableDesc.getTable().getStorageHandler().getMetaHook(); + if (hook == null || !(hook instanceof DefaultHiveMetaHook)) { + return 0; + } + DefaultHiveMetaHook hiveMetaHook = (DefaultHiveMetaHook) hook; + hiveMetaHook.preInsertTable(preInsertTableDesc.getTable().getTTable(), preInsertTableDesc.isOverwrite()); } catch (MetaException e) { throw new HiveException(e); } + return 0; + } + + private int insertCommitWork(Hive db, InsertTableDesc insertTableDesc) throws MetaException { + boolean failed = true; + HiveMetaHook hook = insertTableDesc.getTable().getStorageHandler().getMetaHook(); + if (hook == null || !(hook instanceof DefaultHiveMetaHook)) { + return 0; + } + DefaultHiveMetaHook hiveMetaHook = (DefaultHiveMetaHook) hook; + try { + hiveMetaHook.commitInsertTable(insertTableDesc.getTable().getTTable(), + insertTableDesc.isOverwrite() + ); + failed = false; + } finally { + if (failed) { + hiveMetaHook.rollbackInsertTable(insertTableDesc.getTable().getTTable(), + insertTableDesc.isOverwrite() + ); + } + } + return 0; } private int cacheMetadata(Hive db, CacheMetadataDesc desc) throws HiveException { @@ -1188,12 +1222,10 @@ private int touch(Hive db, AlterTableSimpleDesc touchDesc) throws HiveException { Table tbl = db.getTable(touchDesc.getTableName()); - EnvironmentContext environmentContext = new EnvironmentContext(); - environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); if (touchDesc.getPartSpec() == null) { try { - db.alterTable(touchDesc.getTableName(), tbl, environmentContext); + db.alterTable(touchDesc.getTableName(), tbl, null); } catch (InvalidOperationException e) { throw new HiveException("Uable to update table"); } @@ -1205,7 +1237,7 @@ private int touch(Hive db, AlterTableSimpleDesc touchDesc) throw new HiveException("Specified partition does not exist"); } try { - db.alterPartition(touchDesc.getTableName(), part, environmentContext); + db.alterPartition(touchDesc.getTableName(), part, null); } catch (InvalidOperationException e) { throw new HiveException(e); } @@ -3512,16 +3544,6 @@ private boolean isSchemaEvolutionEnabled(Table tbl) { private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Partition part) throws HiveException { - EnvironmentContext environmentContext = alterTbl.getEnvironmentContext(); - if (environmentContext == null) { - environmentContext = new EnvironmentContext(); - alterTbl.setEnvironmentContext(environmentContext); - } - // do not need update stats in alter table/partition operations - if (environmentContext.getProperties() == null || - environmentContext.getProperties().get(StatsSetupConst.DO_NOT_UPDATE_STATS) == null) { - environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); - } if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAME) { tbl.setDbName(Utilities.getDatabaseName(alterTbl.getNewName())); @@ -3659,10 +3681,6 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part } sd.setCols(alterTbl.getNewCols()); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDPROPS) { - if (StatsSetupConst.USER.equals(environmentContext.getProperties() - .get(StatsSetupConst.STATS_GENERATED))) { - environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS); - } if (part != null) { part.getTPartition().getParameters().putAll(alterTbl.getProps()); } else { @@ -3670,11 +3688,6 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.DROPPROPS) { Iterator keyItr = alterTbl.getProps().keySet().iterator(); - if (StatsSetupConst.USER.equals(environmentContext.getProperties() - .get(StatsSetupConst.STATS_GENERATED))) { - // drop a stats parameter, which triggers recompute stats update automatically - environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS); - } while (keyItr.hasNext()) { if (part != null) { part.getTPartition().getParameters().remove(keyItr.next()); @@ -3768,8 +3781,6 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part } catch (URISyntaxException e) { throw new HiveException(e); } - environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS); - } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSKEWEDBY) { // Validation's been done at compile time. no validation is needed here. List skewedColNames = null; @@ -3815,8 +3826,6 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part throw new HiveException(e); } } - - environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS); } else if (alterTbl.getOp() == AlterTableTypes.ALTERBUCKETNUM) { if (part != null) { if (part.getBucketCount() == alterTbl.getNumberBuckets()) { @@ -4459,7 +4468,7 @@ private int createView(Hive db, CreateViewDesc crtView) throws HiveException { HiveMaterializedViewsRegistry.get().addMaterializedView(tbl); } addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK)); - + //set lineage info DataContainer dc = new DataContainer(tbl.getTTable()); SessionState.get().getLineageState().setLineage(new Path(crtView.getViewName()), dc, tbl.getCols()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 0f05160..e166eee 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -465,7 +465,6 @@ system.registerGenericUDF("printf", GenericUDFPrintf.class); system.registerGenericUDF("greatest", GenericUDFGreatest.class); system.registerGenericUDF("least", GenericUDFLeast.class); - system.registerGenericUDF("cardinality_violation", GenericUDFCardinalityViolation.class); system.registerGenericUDF("from_utc_timestamp", GenericUDFFromUtcTimestamp.class); system.registerGenericUDF("to_utc_timestamp", GenericUDFToUtcTimestamp.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java index 4686e2c..e6b943b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.exec; -import org.apache.hadoop.hive.metastore.ReplChangeManager; -import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.ql.parse.EximUtil; import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.plan.CopyWork; @@ -128,16 +126,15 @@ protected int execute(DriverContext driverContext) { for (FileStatus oneSrc : srcFiles) { console.printInfo("Copying file: " + oneSrc.getPath().toString()); LOG.debug("Copying file: " + oneSrc.getPath().toString()); - - FileSystem actualSrcFs = null; - if (rwork.getReadListFromInput()){ - // TODO : filesystemcache prevents this from being a perf nightmare, but we - // should still probably follow up to see if we need to do something better here. - actualSrcFs = oneSrc.getPath().getFileSystem(conf); - } else { - actualSrcFs = srcFs; - } if (!rwork.getListFilesOnOutputBehaviour(oneSrc)){ + FileSystem actualSrcFs = null; + if (rwork.getReadListFromInput()){ + // TODO : filesystemcache prevents this from being a perf nightmare, but we + // should still probably follow up to see if we need to do something better here. + actualSrcFs = oneSrc.getPath().getFileSystem(conf); + } else { + actualSrcFs = srcFs; + } LOG.debug("ReplCopyTask :cp:" + oneSrc.getPath() + "=>" + toPath); if (!FileUtils.copy(actualSrcFs, oneSrc.getPath(), dstFs, toPath, @@ -151,9 +148,7 @@ protected int execute(DriverContext driverContext) { }else{ LOG.debug("ReplCopyTask _files now tracks:" + oneSrc.getPath().toUri()); console.printInfo("Tracking file: " + oneSrc.getPath().toUri()); - String chksumString = ReplChangeManager.getChksumString(oneSrc.getPath(), actualSrcFs); - listBW.write(ReplChangeManager.encodeFileUri - (oneSrc.getPath().toUri().toString(), chksumString) + "\n"); + listBW.write(oneSrc.getPath().toUri().toString() + "\n"); } } @@ -188,16 +183,12 @@ protected int execute(DriverContext driverContext) { String line = null; while ( (line = br.readLine()) != null){ LOG.debug("ReplCopyTask :_filesReadLine:" + line); - - String[] fileWithChksum = ReplChangeManager.getFileWithChksumFromURI(line); - try { - FileStatus f = ReplChangeManager.getFileStatus(new Path(fileWithChksum[0]), - fileWithChksum[1], conf); - ret.add(f); - } catch (MetaException e) { - // skip and issue warning for missing file - LOG.warn("Cannot find " + fileWithChksum[0] + " in source repo or cmroot"); - } + String fileUriStr = EximUtil.getCMDecodedFileName(line); + // TODO HIVE-15490: Add checksum validation here + Path p = new Path(fileUriStr); + // TODO: again, fs cache should make this okay, but if not, revisit + FileSystem srcFs = p.getFileSystem(conf); + ret.add(srcFs.getFileStatus(p)); // Note - we need srcFs rather than fs, because it is possible that the _files lists files // which are from a different filesystem than the fs where the _files file itself was loaded // from. Currently, it is possible, for eg., to do REPL LOAD hdfs:///dir/ and for the _files diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 12a03d0..68dd5e7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -176,6 +176,7 @@ import java.util.Arrays; import java.util.Calendar; import java.util.Collection; +import java.util.Collections; import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; @@ -194,6 +195,9 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.zip.Deflater; @@ -2102,7 +2106,7 @@ public static ContentSummary getInputSummary(final Context ctx, MapWork work, Pa long[] summary = {0, 0, 0}; - final Set pathNeedProcess = new HashSet<>(); + final List pathNeedProcess = new ArrayList<>(); // Since multiple threads could call this method concurrently, locking // this method will avoid number of threads out of control. @@ -2131,14 +2135,13 @@ public static ContentSummary getInputSummary(final Context ctx, MapWork work, Pa // Process the case when name node call is needed final Map resultMap = new ConcurrentHashMap(); ArrayList> results = new ArrayList>(); - final ExecutorService executor; + final ThreadPoolExecutor executor; int maxThreads = ctx.getConf().getInt("mapred.dfsclient.parallelism.max", 0); if (pathNeedProcess.size() > 1 && maxThreads > 1) { int numExecutors = Math.min(pathNeedProcess.size(), maxThreads); LOG.info("Using " + numExecutors + " threads for getContentSummary"); - executor = Executors.newFixedThreadPool(numExecutors, - new ThreadFactoryBuilder().setDaemon(true) - .setNameFormat("Get-Input-Summary-%d").build()); + executor = new ThreadPoolExecutor(numExecutors, numExecutors, 60, TimeUnit.SECONDS, + new LinkedBlockingQueue()); } else { executor = null; } @@ -2188,19 +2191,11 @@ public void run() { resultMap.put(pathStr, cs.getContentSummary(p, myJobConf)); return; } - - String metaTableStorage = null; - if (partDesc.getTableDesc() != null && - partDesc.getTableDesc().getProperties() != null) { - metaTableStorage = partDesc.getTableDesc().getProperties() - .getProperty(hive_metastoreConstants.META_TABLE_STORAGE, null); - } - if (partDesc.getProperties() != null) { - metaTableStorage = partDesc.getProperties() - .getProperty(hive_metastoreConstants.META_TABLE_STORAGE, metaTableStorage); - } - - HiveStorageHandler handler = HiveUtils.getStorageHandler(myConf, metaTableStorage); + HiveStorageHandler handler = HiveUtils.getStorageHandler(myConf, + SerDeUtils.createOverlayedProperties( + partDesc.getTableDesc().getProperties(), + partDesc.getProperties()) + .getProperty(hive_metastoreConstants.META_TABLE_STORAGE)); if (handler instanceof InputEstimator) { long total = 0; TableDesc tableDesc = partDesc.getTableDesc(); @@ -2212,15 +2207,14 @@ public void run() { Utilities.setColumnTypeList(jobConf, scanOp, true); PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc); Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf); - total += estimator.estimate(jobConf, scanOp, -1).getTotalLength(); + total += estimator.estimate(myJobConf, scanOp, -1).getTotalLength(); } resultMap.put(pathStr, new ContentSummary(total, -1, -1)); - } else { - // todo: should nullify summary for non-native tables, - // not to be selected as a mapjoin target - FileSystem fs = p.getFileSystem(myConf); - resultMap.put(pathStr, fs.getContentSummary(p)); } + // todo: should nullify summary for non-native tables, + // not to be selected as a mapjoin target + FileSystem fs = p.getFileSystem(myConf); + resultMap.put(pathStr, fs.getContentSummary(p)); } catch (Exception e) { // We safely ignore this exception for summary data. // We don't update the cache to protect it from polluting other diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicValueRegistryTez.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicValueRegistryTez.java index b7687c5..7bbedf6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicValueRegistryTez.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicValueRegistryTez.java @@ -122,15 +122,9 @@ public void init(RegistryConf conf) throws Exception { setValue(runtimeValuesInfo.getDynamicValueIDs().get(colIdx), val); } } - // For now, expecting a single row (min/max, aggregated bloom filter), or no rows - if (rowCount == 0) { - LOG.debug("No input rows from " + inputSourceName + ", filling dynamic values with nulls"); - for (int colIdx = 0; colIdx < colExprEvaluators.size(); ++colIdx) { - ExprNodeEvaluator eval = colExprEvaluators.get(colIdx); - setValue(runtimeValuesInfo.getDynamicValueIDs().get(colIdx), null); - } - } else if (rowCount > 1) { - throw new IllegalStateException("Expected 0 or 1 rows from " + inputSourceName + ", got " + rowCount); + // For now, expecting a single row (min/max, aggregated bloom filter) + if (rowCount != 1) { + throw new IllegalStateException("Expected 1 row from " + inputSourceName + ", got " + rowCount); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java index ecac85c..fae6393 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java @@ -36,7 +36,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; -import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Queue; @@ -67,12 +67,6 @@ */ public class TezSessionPoolManager { - private enum CustomQueueAllowed { - TRUE, - FALSE, - IGNORE - } - private static final Logger LOG = LoggerFactory.getLogger(TezSessionPoolManager.class); private static final Random rdm = new Random(); @@ -86,16 +80,13 @@ private Thread expirationThread; private Thread restartThread; + private Semaphore llapQueue; private HiveConf initConf = null; // Config settings. private int numConcurrentLlapQueries = -1; private long sessionLifetimeMs = 0; private long sessionLifetimeJitterMs = 0; - private CustomQueueAllowed customQueueAllowed = CustomQueueAllowed.TRUE; - private List restrictedHiveConf = new ArrayList<>(); - private List restrictedNonHiveConf = new ArrayList<>(); - /** A queue for initial sessions that have not been started yet. */ private Queue initialSessions = new ConcurrentLinkedQueue(); @@ -208,31 +199,6 @@ public void setupPool(HiveConf conf) throws InterruptedException { llapQueue = new Semaphore(numConcurrentLlapQueries, true); this.initConf = conf; - String queueAllowedStr = HiveConf.getVar(initConf, - ConfVars.HIVE_SERVER2_TEZ_SESSION_CUSTOM_QUEUE_ALLOWED); - try { - this.customQueueAllowed = CustomQueueAllowed.valueOf(queueAllowedStr.toUpperCase()); - } catch (Exception ex) { - throw new RuntimeException("Invalid value '" + queueAllowedStr + "' for " + - ConfVars.HIVE_SERVER2_TEZ_SESSION_CUSTOM_QUEUE_ALLOWED.varname); - } - String[] restrictedConfigs = HiveConf.getTrimmedStringsVar(initConf, - ConfVars.HIVE_SERVER2_TEZ_SESSION_RESTRICTED_CONFIGS); - if (restrictedConfigs != null && restrictedConfigs.length > 0) { - HashMap confVars = HiveConf.getOrCreateReverseMap(); - for (String confName : restrictedConfigs) { - if (confName == null || confName.isEmpty()) continue; - confName = confName.toLowerCase(); - ConfVars cv = confVars.get(confName); - if (cv != null) { - restrictedHiveConf.add(cv); - } else { - LOG.warn("A restricted config " + confName + " is not recognized as a Hive setting."); - restrictedNonHiveConf.add(confName); - } - } - } - sessionLifetimeMs = conf.getTimeVar( ConfVars.HIVE_SERVER2_TEZ_SESSION_LIFETIME, TimeUnit.MILLISECONDS); @@ -303,33 +269,10 @@ private TezSessionPoolSession createAndInitSession(String queue, boolean isDefau return sessionState; } - private TezSessionState getSession(HiveConf conf, boolean doOpen) + private TezSessionState getSession(HiveConf conf, boolean doOpen, + boolean forceCreate) throws Exception { String queueName = conf.get("tez.queue.name"); - boolean hasQueue = (queueName != null) && !queueName.isEmpty(); - if (hasQueue) { - switch (customQueueAllowed) { - case FALSE: throw new HiveException("Specifying tez.queue.name is not allowed"); - case IGNORE: { - LOG.warn("User has specified " + queueName + " queue; ignoring the setting"); - queueName = null; - hasQueue = false; - conf.set("tez.queue.name", null); - } - default: // All good. - } - } - for (ConfVars var : restrictedHiveConf) { - String userValue = HiveConf.getVarWithoutType(conf, var), - serverValue = HiveConf.getVarWithoutType(initConf, var); - // Note: with some trickery, we could add logic for each type in ConfVars; for now the - // potential spurious mismatches (e.g. 0 and 0.0 for float) should be easy to work around. - validateRestrictedConfigValues(var.varname, userValue, serverValue); - } - for (String var : restrictedNonHiveConf) { - String userValue = conf.get(var), serverValue = initConf.get(var); - validateRestrictedConfigValues(var, userValue, serverValue); - } // TODO Session re-use completely disabled for doAs=true. Always launches a new session. boolean nonDefaultUser = conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS); @@ -340,9 +283,11 @@ private TezSessionState getSession(HiveConf conf, boolean doOpen) * their own credentials. We expect that with the new security model, things will * run as user hive in most cases. */ - if (nonDefaultUser || !hasInitialSessions || hasQueue) { - LOG.info("QueueName: {} nonDefaultUser: {} defaultQueuePool: {} hasInitialSessions: {}", - queueName, nonDefaultUser, defaultQueuePool, hasInitialSessions); + if (forceCreate || nonDefaultUser || !hasInitialSessions + || ((queueName != null) && !queueName.isEmpty())) { + LOG.info("QueueName: {} nonDefaultUser: {} defaultQueuePool: {} hasInitialSessions: {}" + + " forceCreate: {}", queueName, nonDefaultUser, defaultQueuePool, hasInitialSessions, + forceCreate); return getNewSessionState(conf, queueName, doOpen); } @@ -354,16 +299,6 @@ private TezSessionState getSession(HiveConf conf, boolean doOpen) } } - private void validateRestrictedConfigValues( - String var, String userValue, String serverValue) throws HiveException { - if ((userValue == null) != (serverValue == null) - || (userValue != null && !userValue.equals(serverValue))) { - String logValue = initConf.isHiddenConfig(var) ? "(hidden)" : serverValue; - throw new HiveException(var + " is restricted from being set; server is configured" - + " to use " + logValue + ", but the query configuration specifies " + userValue); - } - } - /** * @param conf HiveConf that is used to initialize the session * @param queueName could be null. Set in the tez session. @@ -387,39 +322,23 @@ private TezSessionState getNewSessionState(HiveConf conf, public void returnSession(TezSessionState tezSessionState, boolean llap) throws Exception { - // Ignore the interrupt status while returning the session, but set it back - // on the thread in case anything else needs to deal with it. - boolean isInterrupted = Thread.interrupted(); - - try { - if (isInterrupted) { - LOG.info("returnSession invoked with interrupt status set"); - } - if (llap && (this.numConcurrentLlapQueries > 0)) { - llapQueue.release(); - } - if (tezSessionState.isDefault() && - tezSessionState instanceof TezSessionPoolSession) { - LOG.info("The session " + tezSessionState.getSessionId() - + " belongs to the pool. Put it back in"); - SessionState sessionState = SessionState.get(); - if (sessionState != null) { - sessionState.setTezSession(null); - } - TezSessionPoolSession poolSession = - (TezSessionPoolSession) tezSessionState; - if (poolSession.returnAfterUse()) { - defaultQueuePool.put(poolSession); - } + if (llap && (this.numConcurrentLlapQueries > 0)) { + llapQueue.release(); + } + if (tezSessionState.isDefault() && tezSessionState instanceof TezSessionPoolSession) { + LOG.info("The session " + tezSessionState.getSessionId() + + " belongs to the pool. Put it back in"); + SessionState sessionState = SessionState.get(); + if (sessionState != null) { + sessionState.setTezSession(null); } - // non default session nothing changes. The user can continue to use the existing - // session in the SessionState - } finally { - // Reset the interrupt status. - if (isInterrupted) { - Thread.currentThread().interrupt(); + TezSessionPoolSession poolSession = (TezSessionPoolSession)tezSessionState; + if (poolSession.returnAfterUse()) { + defaultQueuePool.put(poolSession); } } + // non default session nothing changes. The user can continue to use the existing + // session in the SessionState } public static void closeIfNotDefault( @@ -472,6 +391,11 @@ protected TezSessionPoolSession createSession(String sessionId) { return new TezSessionPoolSession(sessionId, this); } + public TezSessionState getSession(TezSessionState session, HiveConf conf, boolean doOpen, + boolean llap) throws Exception { + return getSession(session, conf, doOpen, false, llap); + } + /* * This method helps to re-use a session in case there has been no change in * the configuration of a session. This will happen only in the case of non-hive-server2 @@ -518,8 +442,8 @@ private static boolean canWorkWithSameSession(TezSessionState session, HiveConf } } - public TezSessionState getSession( - TezSessionState session, HiveConf conf, boolean doOpen, boolean llap) throws Exception { + public TezSessionState getSession(TezSessionState session, HiveConf conf, boolean doOpen, + boolean forceCreate, boolean llap) throws Exception { if (llap && (this.numConcurrentLlapQueries > 0)) { llapQueue.acquire(); // blocks if no more llap queries can be submitted. } @@ -532,7 +456,7 @@ public TezSessionState getSession( closeIfNotDefault(session, false); } - return getSession(conf, doOpen); + return getSession(conf, doOpen, forceCreate); } /** Reopens the session that was found to not be running. */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java index 7479b85..0efca68 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java @@ -190,18 +190,12 @@ public int execute(DriverContext driverContext) { counters = dagClient.getDAGStatus(statusGetOpts).getDAGCounters(); } catch (Exception err) { // Don't fail execution due to counters - just don't print summary info - LOG.warn("Failed to get counters. Ignoring, summary info will be incomplete. " + err, err); + LOG.error("Failed to get counters: " + err, err); counters = null; } } finally { // We return this to the pool even if it's unusable; reopen is supposed to handle this. - try { - TezSessionPoolManager.getInstance() - .returnSession(session, getWork().getLlapMode()); - } catch (Exception e) { - LOG.error("Failed to return session: {} to pool", session, e); - throw e; - } + TezSessionPoolManager.getInstance().returnSession(session, getWork().getLlapMode()); } if (LOG.isInfoEnabled() && counters != null diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index f4499d7..217af3f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -183,8 +183,7 @@ public static String getVectorColumnSimpleName(String hiveTypeName) { public enum InputExpressionType { NONE(0), COLUMN(1), - SCALAR(2), - DYNAMICVALUE(3); + SCALAR(2); private final int value; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java index 2598445..261246b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java @@ -72,8 +72,6 @@ protected void initializeOp(Configuration hconf) throws HiveException { try { heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT); - - conditionEvaluator.init(hconf); } catch (Throwable e) { throw new HiveException(e); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index bb382b1..f7fec8f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -113,7 +113,6 @@ protected void initializeOp(Configuration hconf) throws HiveException { projectedColumns = new int [vExpressions.length]; for (int i = 0; i < projectedColumns.length; i++) { - vExpressions[i].init(hconf); projectedColumns[i] = vExpressions[i].getOutputColumn(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java index 319b4a8..a95098a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.sql.Timestamp; -import java.util.Arrays; import java.util.List; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -50,11 +49,6 @@ private T serializeWrite; - private Category[] categories; - private PrimitiveCategory[] primitiveCategories; - - private int[] outputColumnNums; - public VectorSerializeRow(T serializeWrite) { this(); this.serializeWrite = serializeWrite; @@ -64,164 +58,598 @@ public VectorSerializeRow(T serializeWrite) { private VectorSerializeRow() { } - public void init(List typeNames, int[] columnMap) throws HiveException { + private abstract class Writer { + protected int columnIndex; + + Writer(int columnIndex) { + this.columnIndex = columnIndex; + } + + abstract boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException; + } + + private abstract class AbstractLongWriter extends Writer { + + AbstractLongWriter(int columnIndex) { + super(columnIndex); + } + } + + private class BooleanWriter extends AbstractLongWriter { + + BooleanWriter(int columnIndex) { + super(columnIndex); + } - final int size = typeNames.size(); - categories = new Category[size]; - primitiveCategories = new PrimitiveCategory[size]; - outputColumnNums = Arrays.copyOf(columnMap, size); - TypeInfo typeInfo; - for (int i = 0; i < size; i++) { - typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i)); - categories[i] = typeInfo.getCategory(); - if (categories[i] == Category.PRIMITIVE) { - primitiveCategories[i] = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeBoolean(colVector.vector[0] != 0); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeBoolean(colVector.vector[batchIndex] != 0); + return true; + } else { + serializeWrite.writeNull(); + return false; + } } } } - public void init(List typeNames) throws HiveException { + private class ByteWriter extends AbstractLongWriter { + + ByteWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; - final int size = typeNames.size(); - categories = new Category[size]; - primitiveCategories = new PrimitiveCategory[size]; - outputColumnNums = new int[size]; - TypeInfo typeInfo; - for (int i = 0; i < size; i++) { - typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i)); - categories[i] = typeInfo.getCategory(); - if (categories[i] == Category.PRIMITIVE) { - primitiveCategories[i] = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeByte((byte) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeByte((byte) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } } - outputColumnNums[i] = i; } } - public void init(TypeInfo[] typeInfos, int[] columnMap) - throws HiveException { + private class ShortWriter extends AbstractLongWriter { - final int size = typeInfos.length; - categories = new Category[size]; - primitiveCategories = new PrimitiveCategory[size]; - outputColumnNums = Arrays.copyOf(columnMap, size); - TypeInfo typeInfo; - for (int i = 0; i < typeInfos.length; i++) { - typeInfo = typeInfos[i]; - categories[i] = typeInfo.getCategory(); - if (categories[i] == Category.PRIMITIVE) { - primitiveCategories[i] = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + ShortWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeShort((short) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeShort((short) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } } } } - public int getCount() { - return categories.length; + private class IntWriter extends AbstractLongWriter { + + IntWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeInt((int) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeInt((int) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } } - public void setOutput(Output output) { - serializeWrite.set(output); + private class LongWriter extends AbstractLongWriter { + + LongWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeLong(colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeLong(colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } } - public void setOutputAppend(Output output) { - serializeWrite.setAppend(output); + private class DateWriter extends AbstractLongWriter { + + DateWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeDate((int) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeDate((int) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } } - private boolean hasAnyNulls; - private boolean isAllNulls; + private class TimestampWriter extends Writer { - /* - * Note that when serializing a row, the logical mapping using selected in use has already - * been performed. batchIndex is the actual index of the row. - */ - public void serializeWrite(VectorizedRowBatch batch, int batchIndex) throws IOException { + Timestamp scratchTimestamp; + + TimestampWriter(int columnIndex) { + super(columnIndex); + scratchTimestamp = new Timestamp(0); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + TimestampColumnVector colVector = (TimestampColumnVector) batch.cols[columnIndex]; - hasAnyNulls = false; - isAllNulls = true; - ColumnVector colVector; - int adjustedBatchIndex; - final int size = categories.length; - for (int i = 0; i < size; i++) { - colVector = batch.cols[outputColumnNums[i]]; if (colVector.isRepeating) { - adjustedBatchIndex = 0; + if (colVector.noNulls || !colVector.isNull[0]) { + colVector.timestampUpdate(scratchTimestamp, 0); + serializeWrite.writeTimestamp(scratchTimestamp); + return true; + } else { + serializeWrite.writeNull(); + return false; + } } else { - adjustedBatchIndex = batchIndex; + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + colVector.timestampUpdate(scratchTimestamp, batchIndex); + serializeWrite.writeTimestamp(scratchTimestamp); + return true; + } else { + serializeWrite.writeNull(); + return false; + } } - if (!colVector.noNulls && colVector.isNull[adjustedBatchIndex]) { - serializeWrite.writeNull(); - hasAnyNulls = true; - continue; + } + } + + private class IntervalYearMonthWriter extends AbstractLongWriter { + + IntervalYearMonthWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeHiveIntervalYearMonth((int) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeHiveIntervalYearMonth((int) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class IntervalDayTimeWriter extends Writer { + + private HiveIntervalDayTime hiveIntervalDayTime; + + IntervalDayTimeWriter(int columnIndex) { + super(columnIndex); + hiveIntervalDayTime = new HiveIntervalDayTime(); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + IntervalDayTimeColumnVector colVector = (IntervalDayTimeColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + hiveIntervalDayTime.set(colVector.asScratchIntervalDayTime(0)); + serializeWrite.writeHiveIntervalDayTime(hiveIntervalDayTime); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + hiveIntervalDayTime.set(colVector.asScratchIntervalDayTime(batchIndex)); + serializeWrite.writeHiveIntervalDayTime(hiveIntervalDayTime); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private abstract class AbstractDoubleWriter extends Writer { + + AbstractDoubleWriter(int columnIndex) { + super(columnIndex); + } + } + + private class FloatWriter extends AbstractDoubleWriter { + + FloatWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeFloat((float) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeFloat((float) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class DoubleWriter extends AbstractDoubleWriter { + + DoubleWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeDouble(colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeDouble(colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class StringWriter extends Writer { + + StringWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeString(colVector.vector[0], colVector.start[0], colVector.length[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeString(colVector.vector[batchIndex], + colVector.start[batchIndex], colVector.length[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class BinaryWriter extends Writer { + + BinaryWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeBinary(colVector.vector[0], colVector.start[0], colVector.length[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeBinary(colVector.vector[batchIndex], + colVector.start[batchIndex], colVector.length[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class HiveDecimalWriter extends Writer { + protected HiveDecimalWritable[] vector; + + HiveDecimalWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DecimalColumnVector colVector = (DecimalColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + // We serialize specifying the HiveDecimalWritable but also the desired + // serialization scale that will be used by text serialization for adding + // trailing fractional zeroes. + serializeWrite.writeHiveDecimal(colVector.vector[0], colVector.scale); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeHiveDecimal(colVector.vector[batchIndex], colVector.scale); + return true; + } else { + serializeWrite.writeNull(); + return false; + } } - isAllNulls = false; - switch (categories[i]) { - case PRIMITIVE: - switch (primitiveCategories[i]) { + } + } + + private Writer[] writers; + + private Writer createWriter(TypeInfo typeInfo, int columnIndex) throws HiveException { + Writer writer; + Category category = typeInfo.getCategory(); + switch (category) { + case PRIMITIVE: + { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + switch (primitiveCategory) { + // case VOID: + // UNDONE: + // break; case BOOLEAN: - serializeWrite.writeBoolean(((LongColumnVector) colVector).vector[adjustedBatchIndex] != 0); + writer = new BooleanWriter(columnIndex); break; case BYTE: - serializeWrite.writeByte((byte) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); + writer = new ByteWriter(columnIndex); break; case SHORT: - serializeWrite.writeShort((short) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); + writer = new ShortWriter(columnIndex); break; case INT: - serializeWrite.writeInt((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); + writer = new IntWriter(columnIndex); break; case LONG: - serializeWrite.writeLong(((LongColumnVector) colVector).vector[adjustedBatchIndex]); + writer = new LongWriter(columnIndex); break; case DATE: - serializeWrite.writeDate((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); + writer = new DateWriter(columnIndex); break; case TIMESTAMP: - serializeWrite.writeTimestamp(((TimestampColumnVector) colVector).asScratchTimestamp(adjustedBatchIndex)); + writer = new TimestampWriter(columnIndex); break; case FLOAT: - serializeWrite.writeFloat((float) ((DoubleColumnVector) colVector).vector[adjustedBatchIndex]); + writer = new FloatWriter(columnIndex); break; case DOUBLE: - serializeWrite.writeDouble(((DoubleColumnVector) colVector).vector[adjustedBatchIndex]); + writer = new DoubleWriter(columnIndex); break; case STRING: case CHAR: case VARCHAR: - { - // We store CHAR and VARCHAR without pads, so write with STRING. - BytesColumnVector bytesColVector = (BytesColumnVector) colVector; - serializeWrite.writeString( - bytesColVector.vector[adjustedBatchIndex], - bytesColVector.start[adjustedBatchIndex], - bytesColVector.length[adjustedBatchIndex]); - } + // We store CHAR and VARCHAR without pads, so use STRING writer class. + writer = new StringWriter(columnIndex); break; case BINARY: - { - BytesColumnVector bytesColVector = (BytesColumnVector) colVector; - serializeWrite.writeBinary( - bytesColVector.vector[adjustedBatchIndex], - bytesColVector.start[adjustedBatchIndex], - bytesColVector.length[adjustedBatchIndex]); - } + writer = new BinaryWriter(columnIndex); break; case DECIMAL: - { - DecimalColumnVector decimalColVector = (DecimalColumnVector) colVector; - serializeWrite.writeHiveDecimal(decimalColVector.vector[adjustedBatchIndex], decimalColVector.scale); - } + writer = new HiveDecimalWriter(columnIndex); break; case INTERVAL_YEAR_MONTH: - serializeWrite.writeHiveIntervalYearMonth((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]); + writer = new IntervalYearMonthWriter(columnIndex); break; case INTERVAL_DAY_TIME: - serializeWrite.writeHiveIntervalDayTime(((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedBatchIndex)); + writer = new IntervalDayTimeWriter(columnIndex); break; default: - throw new RuntimeException("Unexpected primitive category " + primitiveCategories[i]); + throw new HiveException("Unexpected primitive type category " + primitiveCategory); } - break; - default: - throw new RuntimeException("Unexpected category " + categories[i]); + } + break; + default: + throw new HiveException("Unexpected type category " + category); + } + return writer; + } + + public void init(List typeNames, int[] columnMap) throws HiveException { + + writers = new Writer[typeNames.size()]; + for (int i = 0; i < typeNames.size(); i++) { + String typeName = typeNames.get(i); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + int columnIndex = columnMap[i]; + Writer writer = createWriter(typeInfo, columnIndex); + writers[i] = writer; + } + } + + public void init(List typeNames) throws HiveException { + + writers = new Writer[typeNames.size()]; + for (int i = 0; i < typeNames.size(); i++) { + String typeName = typeNames.get(i); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + Writer writer = createWriter(typeInfo, i); + writers[i] = writer; + } + } + + public void init(TypeInfo[] typeInfos, int[] columnMap) + throws HiveException { + + writers = new Writer[typeInfos.length]; + for (int i = 0; i < typeInfos.length; i++) { + int columnIndex = columnMap[i]; + Writer writer = createWriter(typeInfos[i], columnIndex); + writers[i] = writer; + } + } + + public int getCount() { + return writers.length; + } + + public void setOutput(Output output) { + serializeWrite.set(output); + } + + public void setOutputAppend(Output output) { + serializeWrite.setAppend(output); + } + + private boolean hasAnyNulls; + private boolean isAllNulls; + + /* + * Note that when serializing a row, the logical mapping using selected in use has already + * been performed. batchIndex is the actual index of the row. + */ + public void serializeWrite(VectorizedRowBatch batch, int batchIndex) throws IOException { + + hasAnyNulls = false; + isAllNulls = true; + for (Writer writer : writers) { + if (!writer.apply(batch, batchIndex)) { + hasAnyNulls = true; + } else { + isAllNulls = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 484f615..c887757 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -54,8 +54,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFAvgDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFAvgTimestamp; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFBloomFilter; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFBloomFilterMerge; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountMerge; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar; @@ -99,7 +97,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.udf.SettableUDF; @@ -588,8 +585,6 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpress } else if (exprDesc instanceof ExprNodeConstantDesc) { ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(), mode); - } else if (exprDesc instanceof ExprNodeDynamicValueDesc) { - ve = getDynamicValueVectorExpression((ExprNodeDynamicValueDesc) exprDesc, mode); } if (ve == null) { throw new HiveException( @@ -1099,21 +1094,6 @@ private VectorExpression getConstantVectorExpression(Object constantValue, TypeI } } - private VectorExpression getDynamicValueVectorExpression(ExprNodeDynamicValueDesc dynamicValueExpr, - VectorExpressionDescriptor.Mode mode) throws HiveException { - String typeName = dynamicValueExpr.getTypeInfo().getTypeName(); - VectorExpressionDescriptor.ArgumentType vectorArgType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(typeName); - if (vectorArgType == VectorExpressionDescriptor.ArgumentType.NONE) { - throw new HiveException("No vector argument type for type name " + typeName); - } - int outCol = -1; - if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { - outCol = ocm.allocateOutputColumn(dynamicValueExpr.getTypeInfo()); - } - - return new DynamicValueVectorExpression(outCol, dynamicValueExpr.getTypeInfo(), dynamicValueExpr.getDynamicValue()); - } - /** * Used as a fast path for operations that don't modify their input, like unary + * and casting boolean to long. IdentityExpression and its children are always @@ -1201,8 +1181,6 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf, builder.setInputExpressionType(i, InputExpressionType.COLUMN); } else if (child instanceof ExprNodeConstantDesc) { builder.setInputExpressionType(i, InputExpressionType.SCALAR); - } else if (child instanceof ExprNodeDynamicValueDesc) { - builder.setInputExpressionType(i, InputExpressionType.DYNAMICVALUE); } else { throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); } @@ -1247,8 +1225,6 @@ private VectorExpression createVectorExpression(Class vectorClass, } else if (child instanceof ExprNodeConstantDesc) { Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child); arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue; - } else if (child instanceof ExprNodeDynamicValueDesc) { - arguments[i] = ((ExprNodeDynamicValueDesc) child).getDynamicValue(); } else { throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); } @@ -2116,13 +2092,8 @@ private VectorExpression getBetweenFilterExpression(List childExpr return null; } - boolean hasDynamicValues = false; - // We don't currently support the BETWEEN ends being columns. They must be scalars. - if ((childExpr.get(2) instanceof ExprNodeDynamicValueDesc) && - (childExpr.get(3) instanceof ExprNodeDynamicValueDesc)) { - hasDynamicValues = true; - } else if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) || + if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) || !(childExpr.get(3) instanceof ExprNodeConstantDesc)) { return null; } @@ -2167,51 +2138,35 @@ private VectorExpression getBetweenFilterExpression(List childExpr // determine class Class cl = null; if (isIntFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterLongColumnBetweenDynamicValue.class : - FilterLongColumnBetween.class); + cl = FilterLongColumnBetween.class; } else if (isIntFamily(colType) && notKeywordPresent) { cl = FilterLongColumnNotBetween.class; } else if (isFloatFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterDoubleColumnBetweenDynamicValue.class : - FilterDoubleColumnBetween.class); + cl = FilterDoubleColumnBetween.class; } else if (isFloatFamily(colType) && notKeywordPresent) { cl = FilterDoubleColumnNotBetween.class; } else if (colType.equals("string") && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterStringColumnBetweenDynamicValue.class : - FilterStringColumnBetween.class); + cl = FilterStringColumnBetween.class; } else if (colType.equals("string") && notKeywordPresent) { cl = FilterStringColumnNotBetween.class; } else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterVarCharColumnBetweenDynamicValue.class : - FilterVarCharColumnBetween.class); + cl = FilterVarCharColumnBetween.class; } else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) { cl = FilterVarCharColumnNotBetween.class; } else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterCharColumnBetweenDynamicValue.class : - FilterCharColumnBetween.class); + cl = FilterCharColumnBetween.class; } else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) { cl = FilterCharColumnNotBetween.class; } else if (colType.equals("timestamp") && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterTimestampColumnBetweenDynamicValue.class : - FilterTimestampColumnBetween.class); + cl = FilterTimestampColumnBetween.class; } else if (colType.equals("timestamp") && notKeywordPresent) { cl = FilterTimestampColumnNotBetween.class; } else if (isDecimalFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterDecimalColumnBetweenDynamicValue.class : - FilterDecimalColumnBetween.class); + cl = FilterDecimalColumnBetween.class; } else if (isDecimalFamily(colType) && notKeywordPresent) { cl = FilterDecimalColumnNotBetween.class; } else if (isDateFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterLongColumnBetweenDynamicValue.class : - FilterLongColumnBetween.class); + cl = FilterLongColumnBetween.class; } else if (isDateFamily(colType) && notKeywordPresent) { cl = FilterLongColumnNotBetween.class; } @@ -2269,12 +2224,6 @@ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, Ve } else if (child instanceof ExprNodeConstantDesc) { // this is a constant (or null) argDescs[i].setConstant((ExprNodeConstantDesc) child); - } else if (child instanceof ExprNodeDynamicValueDesc) { - VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION); - vectorExprs.add(e); - variableArgPositions.add(i); - exprResultColumnNums.add(e.getOutputColumn()); - argDescs[i].setVariable(e.getOutputColumn()); } else { throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : " + child); @@ -2702,14 +2651,6 @@ public static String mapTypeNameSynonyms(String typeName) { add(new AggregateDefinition("stddev_samp", ArgumentType.FLOAT_FAMILY, Mode.PARTIAL1, VectorUDAFStdSampDouble.class)); add(new AggregateDefinition("stddev_samp", ArgumentType.DECIMAL, Mode.PARTIAL1, VectorUDAFStdSampDecimal.class)); add(new AggregateDefinition("stddev_samp", ArgumentType.TIMESTAMP, Mode.PARTIAL1, VectorUDAFStdSampTimestamp.class)); - - // UDAFBloomFilter. Original data is one type, partial/final is another, - // so this requires 2 aggregation classes (partial1/complete), (partial2/final) - add(new AggregateDefinition("bloom_filter", ArgumentType.ALL_FAMILY, Mode.PARTIAL1, VectorUDAFBloomFilter.class)); - add(new AggregateDefinition("bloom_filter", ArgumentType.ALL_FAMILY, Mode.COMPLETE, VectorUDAFBloomFilter.class)); - add(new AggregateDefinition("bloom_filter", ArgumentType.BINARY, Mode.PARTIAL2, VectorUDAFBloomFilterMerge.class)); - add(new AggregateDefinition("bloom_filter", ArgumentType.BINARY, Mode.FINAL, VectorUDAFBloomFilterMerge.class)); - }}; public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java deleted file mode 100644 index 1a34118..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java +++ /dev/null @@ -1,314 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import java.sql.Timestamp; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.plan.DynamicValue; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Constant is represented as a vector with repeating values. - */ -public class DynamicValueVectorExpression extends VectorExpression { - private static final Logger LOG = LoggerFactory.getLogger(DynamicValueVectorExpression.class); - - private static final long serialVersionUID = 1L; - - DynamicValue dynamicValue; - TypeInfo typeInfo; - transient private boolean initialized = false; - - private int outputColumn; - protected long longValue = 0; - private double doubleValue = 0; - private byte[] bytesValue = null; - private HiveDecimal decimalValue = null; - private Timestamp timestampValue = null; - private HiveIntervalDayTime intervalDayTimeValue = null; - private boolean isNullValue = false; - - private ColumnVector.Type type; - private int bytesValueLength = 0; - - public DynamicValueVectorExpression() { - super(); - } - - public DynamicValueVectorExpression(int outputColumn, TypeInfo typeInfo, DynamicValue dynamicValue) { - this(); - this.outputColumn = outputColumn; - this.type = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); - this.dynamicValue = dynamicValue; - this.typeInfo = typeInfo; - } - - private void evaluateLong(VectorizedRowBatch vrg) { - LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumn]; - cv.isRepeating = true; - cv.noNulls = !isNullValue; - if (!isNullValue) { - cv.vector[0] = longValue; - cv.isNull[0] = false; - } else { - cv.isNull[0] = true; - } - } - - private void evaluateDouble(VectorizedRowBatch vrg) { - DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumn]; - cv.isRepeating = true; - cv.noNulls = !isNullValue; - if (!isNullValue) { - cv.vector[0] = doubleValue; - cv.isNull[0] = false; - } else { - cv.isNull[0] = true; - } - } - - private void evaluateBytes(VectorizedRowBatch vrg) { - BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumn]; - cv.isRepeating = true; - cv.noNulls = !isNullValue; - cv.initBuffer(); - if (!isNullValue) { - cv.setVal(0, bytesValue, 0, bytesValueLength); - cv.isNull[0] = false; - } else { - cv.isNull[0] = true; - } - } - - private void evaluateDecimal(VectorizedRowBatch vrg) { - DecimalColumnVector dcv = (DecimalColumnVector) vrg.cols[outputColumn]; - dcv.isRepeating = true; - dcv.noNulls = !isNullValue; - if (!isNullValue) { - dcv.vector[0].set(decimalValue); - dcv.isNull[0] = false; - } else { - dcv.isNull[0] = true; - } - } - - private void evaluateTimestamp(VectorizedRowBatch vrg) { - TimestampColumnVector dcv = (TimestampColumnVector) vrg.cols[outputColumn]; - dcv.isRepeating = true; - dcv.noNulls = !isNullValue; - if (!isNullValue) { - dcv.set(0, timestampValue); - dcv.isNull[0] = false; - } else { - dcv.isNull[0] = true; - } - } - - private void evaluateIntervalDayTime(VectorizedRowBatch vrg) { - IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) vrg.cols[outputColumn]; - dcv.isRepeating = true; - dcv.noNulls = !isNullValue; - if (!isNullValue) { - dcv.set(0, intervalDayTimeValue); - dcv.isNull[0] = false; - } else { - dcv.isNull[0] = true; - } - } - - private void initValue() { - Object val = dynamicValue.getValue(); - - if (val == null) { - isNullValue = true; - } else { - PrimitiveObjectInspector poi = dynamicValue.getObjectInspector(); - byte[] bytesVal; - switch (poi.getPrimitiveCategory()) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - longValue = PrimitiveObjectInspectorUtils.getLong(val, poi); - break; - case FLOAT: - case DOUBLE: - doubleValue = PrimitiveObjectInspectorUtils.getDouble(val, poi); - break; - case STRING: - case CHAR: - case VARCHAR: - bytesVal = PrimitiveObjectInspectorUtils.getString(val, poi).getBytes(); - setBytesValue(bytesVal); - break; - case BINARY: - bytesVal = PrimitiveObjectInspectorUtils.getBinary(val, poi).copyBytes(); - setBytesValue(bytesVal); - break; - case DECIMAL: - decimalValue = PrimitiveObjectInspectorUtils.getHiveDecimal(val, poi); - break; - case DATE: - longValue = DateWritable.dateToDays(PrimitiveObjectInspectorUtils.getDate(val, poi)); - case TIMESTAMP: - timestampValue = PrimitiveObjectInspectorUtils.getTimestamp(val, poi); - break; - case INTERVAL_YEAR_MONTH: - longValue = PrimitiveObjectInspectorUtils.getHiveIntervalYearMonth(val, poi).getTotalMonths(); - break; - case INTERVAL_DAY_TIME: - intervalDayTimeValue = PrimitiveObjectInspectorUtils.getHiveIntervalDayTime(val, poi); - break; - default: - throw new IllegalStateException("Unsupported type " + poi.getPrimitiveCategory()); - } - } - - initialized = true; - } - - @Override - public void init(Configuration conf) { - super.init(conf); - dynamicValue.setConf(conf); - } - - @Override - public void evaluate(VectorizedRowBatch vrg) { - if (!initialized) { - initValue(); - } - - switch (type) { - case LONG: - evaluateLong(vrg); - break; - case DOUBLE: - evaluateDouble(vrg); - break; - case BYTES: - evaluateBytes(vrg); - break; - case DECIMAL: - evaluateDecimal(vrg); - break; - case TIMESTAMP: - evaluateTimestamp(vrg); - break; - case INTERVAL_DAY_TIME: - evaluateIntervalDayTime(vrg); - break; - default: - throw new IllegalStateException("Unsupported type " + type); - } - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - public long getLongValue() { - return longValue; - } - - public void setLongValue(long longValue) { - this.longValue = longValue; - } - - public double getDoubleValue() { - return doubleValue; - } - - public void setDoubleValue(double doubleValue) { - this.doubleValue = doubleValue; - } - - public byte[] getBytesValue() { - return bytesValue; - } - - public void setBytesValue(byte[] bytesValue) { - this.bytesValue = bytesValue.clone(); - this.bytesValueLength = bytesValue.length; - } - - public void setDecimalValue(HiveDecimal decimalValue) { - this.decimalValue = decimalValue; - } - - public HiveDecimal getDecimalValue() { - return decimalValue; - } - - public void setTimestampValue(Timestamp timestampValue) { - this.timestampValue = timestampValue; - } - - public Timestamp getTimestampValue() { - return timestampValue; - } - - public void setIntervalDayTimeValue(HiveIntervalDayTime intervalDayTimeValue) { - this.intervalDayTimeValue = intervalDayTimeValue; - } - - public HiveIntervalDayTime getIntervalDayTimeValue() { - return intervalDayTimeValue; - } - - public String getTypeString() { - return getOutputType(); - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()).build(); - } - - public DynamicValue getDynamicValue() { - return dynamicValue; - } - - public void setDynamicValue(DynamicValue dynamicValue) { - this.dynamicValue = dynamicValue; - } - - public TypeInfo getTypeInfo() { - return typeInfo; - } - - public void setTypeInfo(TypeInfo typeInfo) { - this.typeInfo = typeInfo; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index 218f306..8fca8a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -22,8 +22,6 @@ import java.util.Map; import com.google.common.collect.ImmutableMap; - -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -33,7 +31,7 @@ public abstract class VectorExpression implements Serializable { public enum Type { STRING, CHAR, VARCHAR, TIMESTAMP, DATE, LONG, DOUBLE, DECIMAL, - INTERVAL_YEAR_MONTH, INTERVAL_DAY_TIME, BINARY, OTHER; + INTERVAL_YEAR_MONTH, INTERVAL_DAY_TIME, OTHER; private static Map types = ImmutableMap.builder() .put("string", STRING) .put("char", CHAR) @@ -45,7 +43,6 @@ .put("decimal", DECIMAL) .put("interval_year_month", INTERVAL_YEAR_MONTH) .put("interval_day_time", INTERVAL_DAY_TIME) - .put("binary", BINARY) .build(); public static Type getValue(String name) { @@ -79,14 +76,6 @@ public static Type getValue(String name) { */ public abstract void evaluate(VectorizedRowBatch batch); - public void init(Configuration conf) { - if (childExpressions != null) { - for (VectorExpression child : childExpressions) { - child.init(conf); - } - } - } - /** * Returns the index of the output column in the array * of column vectors. If not applicable, -1 is returned. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorInBloomFilterColDynamicValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorInBloomFilterColDynamicValue.java deleted file mode 100644 index 188a87e..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorInBloomFilterColDynamicValue.java +++ /dev/null @@ -1,285 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import java.io.ByteArrayInputStream; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.plan.DynamicValue; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; -import org.apache.hive.common.util.BloomFilter; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class VectorInBloomFilterColDynamicValue extends VectorExpression { - private static final long serialVersionUID = 1L; - - private static final Logger LOG = LoggerFactory.getLogger(VectorInBloomFilterColDynamicValue.class); - - protected int colNum; - protected DynamicValue bloomFilterDynamicValue; - protected transient boolean initialized = false; - protected transient BloomFilter bloomFilter; - protected transient BloomFilterCheck bfCheck; - - public VectorInBloomFilterColDynamicValue(int colNum, DynamicValue bloomFilterDynamicValue) { - this.colNum = colNum; - this.bloomFilterDynamicValue = bloomFilterDynamicValue; - } - - public VectorInBloomFilterColDynamicValue() { - } - - @Override - public void init(Configuration conf) { - super.init(conf); - bloomFilterDynamicValue.setConf(conf); - - // Instantiate BloomFilterCheck based on input column type - VectorExpression.Type colType = this.getInputTypes()[0]; - switch (colType) { - case LONG: - case DATE: - bfCheck = new LongBloomFilterCheck(); - break; - case DOUBLE: - bfCheck = new DoubleBloomFilterCheck(); - break; - case DECIMAL: - bfCheck = new DecimalBloomFilterCheck(); - break; - case STRING: - case CHAR: - case VARCHAR: - case BINARY: - bfCheck = new BytesBloomFilterCheck(); - break; - case TIMESTAMP: - bfCheck = new TimestampBloomFilterCheck(); - break; - default: - throw new IllegalStateException("Unsupported type " + colType); - } - } - - private void initValue() { - try { - Object val = bloomFilterDynamicValue.getValue(); - if (val != null) { - BinaryObjectInspector boi = (BinaryObjectInspector) bloomFilterDynamicValue.getObjectInspector(); - byte[] bytes = boi.getPrimitiveJavaObject(val); - bloomFilter = BloomFilter.deserialize(new ByteArrayInputStream(bytes)); - } else { - bloomFilter = null; - } - initialized = true; - } catch (Exception err) { - throw new RuntimeException(err); - } - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - if (!initialized) { - initValue(); - } - - ColumnVector inputColVector = batch.cols[colNum]; - int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - // In case the dynamic value resolves to a null value - if (bloomFilter == null) { - batch.size = 0; - } - - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!(bfCheck.checkValue(inputColVector, 0))) { - - //Entire batch is filtered out. - batch.size = 0; - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j=0; j != n; j++) { - int i = sel[j]; - if (bfCheck.checkValue(inputColVector, i)) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (bfCheck.checkValue(inputColVector, i)) { - sel[newSize++] = i; - } - } - if (newSize < n) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { - if (!(bfCheck.checkValue(inputColVector, 0))) { - - //Entire batch is filtered out. - batch.size = 0; - } - } else { - batch.size = 0; - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j=0; j != n; j++) { - int i = sel[j]; - if (!nullPos[i]) { - if (bfCheck.checkValue(inputColVector, i)) { - sel[newSize++] = i; - } - } - } - - //Change the selected vector - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos[i]) { - if (bfCheck.checkValue(inputColVector, i)) { - sel[newSize++] = i; - } - } - } - if (newSize < n) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - } - - @Override - public int getOutputColumn() { - return -1; - } - - @Override - public String getOutputType() { - return "boolean"; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - @Override - public Descriptor getDescriptor() { - VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); - b.setMode(VectorExpressionDescriptor.Mode.FILTER) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.ALL_FAMILY, - VectorExpressionDescriptor.ArgumentType.BINARY) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.DYNAMICVALUE); - return b.build(); - } - - // Type-specific handling - abstract class BloomFilterCheck { - abstract public boolean checkValue(ColumnVector columnVector, int idx); - } - - class BytesBloomFilterCheck extends BloomFilterCheck { - @Override - public boolean checkValue(ColumnVector columnVector, int idx) { - BytesColumnVector col = (BytesColumnVector) columnVector; - return bloomFilter.testBytes(col.vector[idx], col.start[idx], col.length[idx]); - } - } - - class LongBloomFilterCheck extends BloomFilterCheck { - @Override - public boolean checkValue(ColumnVector columnVector, int idx) { - LongColumnVector col = (LongColumnVector) columnVector; - return bloomFilter.testLong(col.vector[idx]); - } - } - - class DoubleBloomFilterCheck extends BloomFilterCheck { - @Override - public boolean checkValue(ColumnVector columnVector, int idx) { - DoubleColumnVector col = (DoubleColumnVector) columnVector; - return bloomFilter.testDouble(col.vector[idx]); - } - } - - class DecimalBloomFilterCheck extends BloomFilterCheck { - private byte[] scratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES]; - - @Override - public boolean checkValue(ColumnVector columnVector, int idx) { - DecimalColumnVector col = (DecimalColumnVector) columnVector; - int startIdx = col.vector[idx].toBytes(scratchBuffer); - return bloomFilter.testBytes(scratchBuffer, startIdx, scratchBuffer.length - startIdx); - } - } - - class TimestampBloomFilterCheck extends BloomFilterCheck { - @Override - public boolean checkValue(ColumnVector columnVector, int idx) { - TimestampColumnVector col = (TimestampColumnVector) columnVector; - return bloomFilter.testLong(col.time[idx]); - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java deleted file mode 100644 index 3ecb82e..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java +++ /dev/null @@ -1,474 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.Arrays; - -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression.AggregationBuffer; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.AggregationDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator; -import org.apache.hadoop.hive.ql.util.JavaDataModel; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.hive.common.util.BloomFilter; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class VectorUDAFBloomFilter extends VectorAggregateExpression { - - private static final Logger LOG = LoggerFactory.getLogger(VectorUDAFBloomFilter.class); - - private static final long serialVersionUID = 1L; - - private VectorExpression inputExpression; - private long expectedEntries = -1; - private ValueProcessor valueProcessor; - transient private int bitSetSize = -1; - transient private BytesWritable bw = new BytesWritable(); - transient private ByteArrayOutputStream byteStream = new ByteArrayOutputStream(); - - /** - * class for storing the current aggregate value. - */ - private static final class Aggregation implements AggregationBuffer { - private static final long serialVersionUID = 1L; - - BloomFilter bf; - - public Aggregation(long expectedEntries) { - bf = new BloomFilter(expectedEntries); - } - - @Override - public int getVariableSize() { - throw new UnsupportedOperationException(); - } - - @Override - public void reset() { - bf.reset(); - } - } - - public VectorUDAFBloomFilter(VectorExpression inputExpression) { - this(); - this.inputExpression = inputExpression; - - // Instantiate the ValueProcessor based on the input type - VectorExpressionDescriptor.ArgumentType inputType = - VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(inputExpression.getOutputType()); - switch (inputType) { - case INT_FAMILY: - case DATE: - valueProcessor = new ValueProcessorLong(); - break; - case FLOAT_FAMILY: - valueProcessor = new ValueProcessorDouble(); - break; - case DECIMAL: - valueProcessor = new ValueProcessorDecimal(); - break; - case STRING: - case CHAR: - case VARCHAR: - case STRING_FAMILY: - case BINARY: - valueProcessor = new ValueProcessorBytes(); - break; - case TIMESTAMP: - valueProcessor = new ValueProcessorTimestamp(); - break; - default: - throw new IllegalStateException("Unsupported type " + inputType); - } - } - - public VectorUDAFBloomFilter() { - super(); - } - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException { - if (expectedEntries < 0) { - throw new IllegalStateException("expectedEntries not initialized"); - } - return new Aggregation(expectedEntries); - } - - @Override - public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) - throws HiveException { - - inputExpression.evaluate(batch); - - ColumnVector inputColumn = batch.cols[this.inputExpression.getOutputColumn()]; - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - Aggregation myagg = (Aggregation) agg; - - if (inputColumn.isRepeating) { - if (inputColumn.noNulls) { - valueProcessor.processValue(myagg, inputColumn, 0); - } - return; - } - - if (!batch.selectedInUse && inputColumn.noNulls) { - iterateNoSelectionNoNulls(myagg, inputColumn, batchSize); - } - else if (!batch.selectedInUse) { - iterateNoSelectionHasNulls(myagg, inputColumn, batchSize); - } - else if (inputColumn.noNulls){ - iterateSelectionNoNulls(myagg, inputColumn, batchSize, batch.selected); - } - else { - iterateSelectionHasNulls(myagg, inputColumn, batchSize, batch.selected); - } - } - - private void iterateNoSelectionNoNulls( - Aggregation myagg, - ColumnVector inputColumn, - int batchSize) { - for (int i=0; i< batchSize; ++i) { - valueProcessor.processValue(myagg, inputColumn, i); - } - } - - private void iterateNoSelectionHasNulls( - Aggregation myagg, - ColumnVector inputColumn, - int batchSize) { - - for (int i=0; i< batchSize; ++i) { - if (!inputColumn.isNull[i]) { - valueProcessor.processValue(myagg, inputColumn, i); - } - } - } - - private void iterateSelectionNoNulls( - Aggregation myagg, - ColumnVector inputColumn, - int batchSize, - int[] selected) { - - for (int j=0; j< batchSize; ++j) { - int i = selected[j]; - valueProcessor.processValue(myagg, inputColumn, i); - } - } - - private void iterateSelectionHasNulls( - Aggregation myagg, - ColumnVector inputColumn, - int batchSize, - int[] selected) { - - for (int j=0; j< batchSize; ++j) { - int i = selected[j]; - if (!inputColumn.isNull[i]) { - valueProcessor.processValue(myagg, inputColumn, i); - } - } - } - - @Override - public void aggregateInputSelection( - VectorAggregationBufferRow[] aggregationBufferSets, int aggregateIndex, - VectorizedRowBatch batch) throws HiveException { - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - inputExpression.evaluate(batch); - - ColumnVector inputColumn = batch.cols[this.inputExpression.getOutputColumn()]; - - if (inputColumn.noNulls) { - if (inputColumn.isRepeating) { - iterateNoNullsRepeatingWithAggregationSelection( - aggregationBufferSets, aggregateIndex, - inputColumn, batchSize); - } else { - if (batch.selectedInUse) { - iterateNoNullsSelectionWithAggregationSelection( - aggregationBufferSets, aggregateIndex, - inputColumn, batch.selected, batchSize); - } else { - iterateNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, - inputColumn, batchSize); - } - } - } else { - if (inputColumn.isRepeating) { - // All nulls, no-op for min/max - } else { - if (batch.selectedInUse) { - iterateHasNullsSelectionWithAggregationSelection( - aggregationBufferSets, aggregateIndex, - inputColumn, batchSize, batch.selected); - } else { - iterateHasNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, - inputColumn, batchSize); - } - } - } - } - - private void iterateNoNullsRepeatingWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregrateIndex, - ColumnVector inputColumn, - int batchSize) { - - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregrateIndex, - i); - valueProcessor.processValue(myagg, inputColumn, 0); - } - } - - private void iterateNoNullsSelectionWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregrateIndex, - ColumnVector inputColumn, - int[] selection, - int batchSize) { - - for (int i=0; i < batchSize; ++i) { - int row = selection[i]; - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregrateIndex, - i); - valueProcessor.processValue(myagg, inputColumn, row); - } - } - - private void iterateNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregrateIndex, - ColumnVector inputColumn, - int batchSize) { - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregrateIndex, - i); - valueProcessor.processValue(myagg, inputColumn, i); - } - } - - private void iterateHasNullsSelectionWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregrateIndex, - ColumnVector inputColumn, - int batchSize, - int[] selection) { - - for (int i=0; i < batchSize; ++i) { - int row = selection[i]; - if (!inputColumn.isNull[row]) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregrateIndex, - i); - valueProcessor.processValue(myagg, inputColumn, i); - } - } - } - - private void iterateHasNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregrateIndex, - ColumnVector inputColumn, - int batchSize) { - - for (int i=0; i < batchSize; ++i) { - if (!inputColumn.isNull[i]) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregrateIndex, - i); - valueProcessor.processValue(myagg, inputColumn, i); - } - } - } - - private Aggregation getCurrentAggregationBuffer( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregrateIndex, - int row) { - VectorAggregationBufferRow mySet = aggregationBufferSets[row]; - Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregrateIndex); - return myagg; - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException { - agg.reset(); - } - - @Override - public Object evaluateOutput(AggregationBuffer agg) throws HiveException { - try { - Aggregation bfAgg = (Aggregation) agg; - byteStream.reset(); - BloomFilter.serialize(byteStream, bfAgg.bf); - byte[] bytes = byteStream.toByteArray(); - bw.set(bytes, 0, bytes.length); - return bw; - } catch (IOException err) { - throw new HiveException("Error encountered while serializing bloomfilter", err); - } - } - - @Override - public ObjectInspector getOutputObjectInspector() { - return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; - } - - @Override - public int getAggregationBufferFixedSize() { - if (bitSetSize < 0) { - // Not pretty, but we need a way to get the size - try { - Aggregation agg = (Aggregation) getNewAggregationBuffer(); - bitSetSize = agg.bf.getBitSet().length; - } catch (Exception e) { - throw new RuntimeException("Unexpected error while creating AggregationBuffer", e); - } - } - - // BloomFilter: object(BitSet: object(data: long[]), numBits: int, numHashFunctions: int) - JavaDataModel model = JavaDataModel.get(); - int bloomFilterSize = JavaDataModel.alignUp(model.object() + model.lengthForLongArrayOfSize(bitSetSize), - model.memoryAlign()); - return JavaDataModel.alignUp( - model.object() + bloomFilterSize + model.primitive1() + model.primitive1(), - model.memoryAlign()); - } - - @Override - public void init(AggregationDesc desc) throws HiveException { - GenericUDAFBloomFilterEvaluator udafBloomFilter = - (GenericUDAFBloomFilterEvaluator) desc.getGenericUDAFEvaluator(); - expectedEntries = udafBloomFilter.getExpectedEntries(); - } - - public VectorExpression getInputExpression() { - return inputExpression; - } - - public void setInputExpression(VectorExpression inputExpression) { - this.inputExpression = inputExpression; - } - - public long getExpectedEntries() { - return expectedEntries; - } - - public void setExpectedEntries(long expectedEntries) { - this.expectedEntries = expectedEntries; - } - - // Type-specific handling done here - private static abstract class ValueProcessor { - abstract protected void processValue(Aggregation myagg, ColumnVector inputColumn, int index); - } - - // - // Type-specific implementations - // - - public static class ValueProcessorBytes extends ValueProcessor { - @Override - protected void processValue(Aggregation myagg, ColumnVector columnVector, int i) { - BytesColumnVector inputColumn = (BytesColumnVector) columnVector; - myagg.bf.addBytes(inputColumn.vector[i], inputColumn.start[i], inputColumn.length[i]); - } - } - - public static class ValueProcessorLong extends ValueProcessor { - @Override - protected void processValue(Aggregation myagg, ColumnVector columnVector, int i) { - LongColumnVector inputColumn = (LongColumnVector) columnVector; - myagg.bf.addLong(inputColumn.vector[i]); - } - } - - public static class ValueProcessorDouble extends ValueProcessor { - @Override - protected void processValue(Aggregation myagg, ColumnVector columnVector, int i) { - DoubleColumnVector inputColumn = (DoubleColumnVector) columnVector; - myagg.bf.addDouble(inputColumn.vector[i]); - } - } - - public static class ValueProcessorDecimal extends ValueProcessor { - private byte[] scratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES]; - - @Override - protected void processValue(Aggregation myagg, ColumnVector columnVector, int i) { - DecimalColumnVector inputColumn = (DecimalColumnVector) columnVector; - int startIdx = inputColumn.vector[i].toBytes(scratchBuffer); - myagg.bf.addBytes(scratchBuffer, startIdx, scratchBuffer.length - startIdx); - } - } - - public static class ValueProcessorTimestamp extends ValueProcessor { - @Override - protected void processValue(Aggregation myagg, ColumnVector columnVector, int i) { - TimestampColumnVector inputColumn = (TimestampColumnVector) columnVector; - myagg.bf.addLong(inputColumn.time[i]); - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java deleted file mode 100644 index ad190b7..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java +++ /dev/null @@ -1,365 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates; - -import java.io.ByteArrayOutputStream; -import java.util.Arrays; - -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression.AggregationBuffer; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.AggregationDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hive.common.util.BloomFilter; - -public class VectorUDAFBloomFilterMerge extends VectorAggregateExpression { - - private static final long serialVersionUID = 1L; - - private VectorExpression inputExpression; - private long expectedEntries = -1; - transient private int aggBufferSize = -1; - transient private BytesWritable bw = new BytesWritable(); - - /** - * class for storing the current aggregate value. - */ - private static final class Aggregation implements AggregationBuffer { - private static final long serialVersionUID = 1L; - - byte[] bfBytes; - - public Aggregation(long expectedEntries) { - try { - BloomFilter bf = new BloomFilter(expectedEntries); - ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); - BloomFilter.serialize(bytesOut, bf); - bfBytes = bytesOut.toByteArray(); - } catch (Exception err) { - throw new IllegalArgumentException("Error creating aggregation buffer", err); - } - } - - @Override - public int getVariableSize() { - throw new UnsupportedOperationException(); - } - - @Override - public void reset() { - // Do not change the initial bytes which contain NumHashFunctions/NumBits! - Arrays.fill(bfBytes, BloomFilter.START_OF_SERIALIZED_LONGS, bfBytes.length, (byte) 0); - } - } - - public VectorUDAFBloomFilterMerge(VectorExpression inputExpression) { - this(); - this.inputExpression = inputExpression; - } - - public VectorUDAFBloomFilterMerge() { - super(); - } - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException { - if (expectedEntries < 0) { - throw new IllegalStateException("expectedEntries not initialized"); - } - return new Aggregation(expectedEntries); - } - - @Override - public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) - throws HiveException { - - inputExpression.evaluate(batch); - - ColumnVector inputColumn = batch.cols[this.inputExpression.getOutputColumn()]; - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - Aggregation myagg = (Aggregation) agg; - - if (inputColumn.isRepeating) { - if (inputColumn.noNulls) { - processValue(myagg, inputColumn, 0); - } - return; - } - - if (!batch.selectedInUse && inputColumn.noNulls) { - iterateNoSelectionNoNulls(myagg, inputColumn, batchSize); - } - else if (!batch.selectedInUse) { - iterateNoSelectionHasNulls(myagg, inputColumn, batchSize); - } - else if (inputColumn.noNulls){ - iterateSelectionNoNulls(myagg, inputColumn, batchSize, batch.selected); - } - else { - iterateSelectionHasNulls(myagg, inputColumn, batchSize, batch.selected); - } - } - - private void iterateNoSelectionNoNulls( - Aggregation myagg, - ColumnVector inputColumn, - int batchSize) { - for (int i=0; i< batchSize; ++i) { - processValue(myagg, inputColumn, i); - } - } - - private void iterateNoSelectionHasNulls( - Aggregation myagg, - ColumnVector inputColumn, - int batchSize) { - - for (int i=0; i< batchSize; ++i) { - if (!inputColumn.isNull[i]) { - processValue(myagg, inputColumn, i); - } - } - } - - private void iterateSelectionNoNulls( - Aggregation myagg, - ColumnVector inputColumn, - int batchSize, - int[] selected) { - - for (int j=0; j< batchSize; ++j) { - int i = selected[j]; - processValue(myagg, inputColumn, i); - } - } - - private void iterateSelectionHasNulls( - Aggregation myagg, - ColumnVector inputColumn, - int batchSize, - int[] selected) { - - for (int j=0; j< batchSize; ++j) { - int i = selected[j]; - if (!inputColumn.isNull[i]) { - processValue(myagg, inputColumn, i); - } - } - } - - @Override - public void aggregateInputSelection( - VectorAggregationBufferRow[] aggregationBufferSets, int aggregateIndex, - VectorizedRowBatch batch) throws HiveException { - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - inputExpression.evaluate(batch); - - ColumnVector inputColumn = batch.cols[this.inputExpression.getOutputColumn()]; - - if (inputColumn.noNulls) { - if (inputColumn.isRepeating) { - iterateNoNullsRepeatingWithAggregationSelection( - aggregationBufferSets, aggregateIndex, - inputColumn, batchSize); - } else { - if (batch.selectedInUse) { - iterateNoNullsSelectionWithAggregationSelection( - aggregationBufferSets, aggregateIndex, - inputColumn, batch.selected, batchSize); - } else { - iterateNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, - inputColumn, batchSize); - } - } - } else { - if (inputColumn.isRepeating) { - // All nulls, no-op for min/max - } else { - if (batch.selectedInUse) { - iterateHasNullsSelectionWithAggregationSelection( - aggregationBufferSets, aggregateIndex, - inputColumn, batchSize, batch.selected); - } else { - iterateHasNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, - inputColumn, batchSize); - } - } - } - } - - private void iterateNoNullsRepeatingWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregrateIndex, - ColumnVector inputColumn, - int batchSize) { - - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregrateIndex, - i); - processValue(myagg, inputColumn, 0); - } - } - - private void iterateNoNullsSelectionWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregrateIndex, - ColumnVector inputColumn, - int[] selection, - int batchSize) { - - for (int i=0; i < batchSize; ++i) { - int row = selection[i]; - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregrateIndex, - i); - processValue(myagg, inputColumn, row); - } - } - - private void iterateNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregrateIndex, - ColumnVector inputColumn, - int batchSize) { - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregrateIndex, - i); - processValue(myagg, inputColumn, i); - } - } - - private void iterateHasNullsSelectionWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregrateIndex, - ColumnVector inputColumn, - int batchSize, - int[] selection) { - - for (int i=0; i < batchSize; ++i) { - int row = selection[i]; - if (!inputColumn.isNull[row]) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregrateIndex, - i); - processValue(myagg, inputColumn, i); - } - } - } - - private void iterateHasNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregrateIndex, - ColumnVector inputColumn, - int batchSize) { - - for (int i=0; i < batchSize; ++i) { - if (!inputColumn.isNull[i]) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregrateIndex, - i); - processValue(myagg, inputColumn, i); - } - } - } - - private Aggregation getCurrentAggregationBuffer( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregrateIndex, - int row) { - VectorAggregationBufferRow mySet = aggregationBufferSets[row]; - Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregrateIndex); - return myagg; - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException { - agg.reset(); - } - - @Override - public Object evaluateOutput(AggregationBuffer agg) throws HiveException { - Aggregation bfAgg = (Aggregation) agg; - bw.set(bfAgg.bfBytes, 0, bfAgg.bfBytes.length); - return bw; - } - - @Override - public ObjectInspector getOutputObjectInspector() { - return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; - } - - @Override - public int getAggregationBufferFixedSize() { - if (aggBufferSize < 0) { - // Not pretty, but we need a way to get the size - try { - Aggregation agg = (Aggregation) getNewAggregationBuffer(); - aggBufferSize = agg.bfBytes.length; - } catch (Exception e) { - throw new RuntimeException("Unexpected error while creating AggregationBuffer", e); - } - } - - return aggBufferSize; - } - - @Override - public void init(AggregationDesc desc) throws HiveException { - GenericUDAFBloomFilterEvaluator udafBloomFilter = - (GenericUDAFBloomFilterEvaluator) desc.getGenericUDAFEvaluator(); - expectedEntries = udafBloomFilter.getExpectedEntries(); - } - - void processValue(Aggregation myagg, ColumnVector columnVector, int i) { - // columnVector entry is byte array representing serialized BloomFilter. - // BloomFilter.mergeBloomFilterBytes() does a simple byte ORing - // which should be faster than deserialize/merge. - BytesColumnVector inputColumn = (BytesColumnVector) columnVector; - BloomFilter.mergeBloomFilterBytes(myagg.bfBytes, 0, myagg.bfBytes.length, - inputColumn.vector[i], inputColumn.start[i], inputColumn.length[i]); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcRowGroupCountPrinter.java ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcRowGroupCountPrinter.java deleted file mode 100644 index 18ef325..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcRowGroupCountPrinter.java +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.hooks; - -import java.util.List; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.llap.counters.LlapIOCounters; -import org.apache.hadoop.hive.ql.QueryPlan; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.tez.TezTask; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.tez.common.counters.CounterGroup; -import org.apache.tez.common.counters.TezCounter; -import org.apache.tez.common.counters.TezCounters; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Post execution hook to print number of ORC row groups read from the counter. Used for Predicate Pushdown testing. - */ -public class PostExecOrcRowGroupCountPrinter implements ExecuteWithHookContext { - private static final Logger LOG = LoggerFactory.getLogger(PostExecOrcRowGroupCountPrinter.class.getName()); - - @Override - public void run(HookContext hookContext) throws Exception { - assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK); - HiveConf conf = hookContext.getConf(); - if (!"tez".equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE))) { - return; - } - - LOG.info("Executing post execution hook to print ORC row groups read counter.."); - SessionState ss = SessionState.get(); - SessionState.LogHelper console = ss.getConsole(); - QueryPlan plan = hookContext.getQueryPlan(); - if (plan == null) { - return; - } - - List rootTasks = Utilities.getTezTasks(plan.getRootTasks()); - for (TezTask tezTask : rootTasks) { - LOG.info("Printing ORC row group counter for tez task: " + tezTask.getName()); - TezCounters counters = tezTask.getTezCounters(); - if (counters != null) { - for (CounterGroup group : counters) { - if (group.getName().equals(LlapIOCounters.class.getName())) { - console.printError(tezTask.getId() + " LLAP IO COUNTERS:"); - for (TezCounter counter : group) { - if (counter.getDisplayName().equals(LlapIOCounters.SELECTED_ROWGROUPS.name())) { - console.printError(" " + counter.getDisplayName() + ": " + counter.getValue()); - } - } - } - } - } - } - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 0f9e86b..beed6b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -43,15 +43,11 @@ import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.SelectOperator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.parse.GenTezUtils; import org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext; -import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc; import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc; @@ -721,7 +717,6 @@ public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcCo Operator parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition); if (parentBigTableOp instanceof ReduceSinkOperator) { - Operator parentSelectOpOfBigTableOp = parentBigTableOp.getParentOperators().get(0); if (removeReduceSink) { for (Operator p : parentBigTableOp.getParentOperators()) { // we might have generated a dynamic partition operator chain. Since @@ -764,65 +759,11 @@ public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcCo } op.getChildOperators().remove(joinOp); } - - // Remove semijoin Op if there is any. - if (context.parseContext.getRsOpToTsOpMap().size() > 0) { - removeCycleCreatingSemiJoinOps(mapJoinOp, parentSelectOpOfBigTableOp, - context.parseContext); - } } return mapJoinOp; } - // Remove any semijoin branch associated with mapjoin's parent's operator - // pipeline which can cause a cycle after mapjoin optimization. - private void removeCycleCreatingSemiJoinOps(MapJoinOperator mapjoinOp, - Operator parentSelectOpOfBigTable, - ParseContext parseContext) throws SemanticException { - boolean semiJoinCycle = false; - ReduceSinkOperator rs = null; - TableScanOperator ts = null; - for (Operator op : parentSelectOpOfBigTable.getChildOperators()) { - if (!(op instanceof SelectOperator)) { - continue; - } - - while (op.getChildOperators().size() > 0 ) { - op = op.getChildOperators().get(0); - if (!(op instanceof ReduceSinkOperator)) { - continue; - } - rs = (ReduceSinkOperator) op; - ts = parseContext.getRsOpToTsOpMap().get(rs); - if (ts == null) { - continue; - } - for (Operator parent : mapjoinOp.getParentOperators()) { - if (!(parent instanceof ReduceSinkOperator)) { - continue; - } - - Set tsOps = OperatorUtils.findOperatorsUpstream(parent, - TableScanOperator.class); - for (TableScanOperator parentTS : tsOps) { - // If the parent is same as the ts, then we have a cycle. - if (ts == parentTS) { - semiJoinCycle = true; - break; - } - } - } - } - } - - // By design there can be atmost 1 such cycle. - if (semiJoinCycle) { - GenTezUtils.removeBranch(rs); - GenTezUtils.removeSemiJoinOperator(parseContext, rs, ts); - } - } - private AppMasterEventOperator findDynamicPartitionBroadcast(Operator parent) { for (Operator op : parent.getChildOperators()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java index 3a1897f..e4f3057 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java @@ -145,25 +145,25 @@ public HiveSubQRemoveRelBuilder(Context context, RelOptCluster cluster, } this.aggregateFactory = Util.first(context.unwrap(RelFactories.AggregateFactory.class), - HiveRelFactories.HIVE_AGGREGATE_FACTORY); + RelFactories.DEFAULT_AGGREGATE_FACTORY); this.filterFactory = Util.first(context.unwrap(RelFactories.FilterFactory.class), HiveRelFactories.HIVE_FILTER_FACTORY); this.projectFactory = Util.first(context.unwrap(RelFactories.ProjectFactory.class), - HiveRelFactories.HIVE_PROJECT_FACTORY); + RelFactories.DEFAULT_PROJECT_FACTORY); this.sortFactory = Util.first(context.unwrap(RelFactories.SortFactory.class), - HiveRelFactories.HIVE_SORT_FACTORY); + RelFactories.DEFAULT_SORT_FACTORY); this.setOpFactory = Util.first(context.unwrap(RelFactories.SetOpFactory.class), - HiveRelFactories.HIVE_SET_OP_FACTORY); + RelFactories.DEFAULT_SET_OP_FACTORY); this.joinFactory = Util.first(context.unwrap(RelFactories.JoinFactory.class), - HiveRelFactories.HIVE_JOIN_FACTORY); + RelFactories.DEFAULT_JOIN_FACTORY); this.semiJoinFactory = Util.first(context.unwrap(RelFactories.SemiJoinFactory.class), - HiveRelFactories.HIVE_SEMI_JOIN_FACTORY); + RelFactories.DEFAULT_SEMI_JOIN_FACTORY); this.correlateFactory = Util.first(context.unwrap(RelFactories.CorrelateFactory.class), RelFactories.DEFAULT_CORRELATE_FACTORY); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 9faccd7..009d9e5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -56,8 +56,6 @@ import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.Statistics; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -393,11 +391,6 @@ private void updateColStats(Set projIndxLst, boolean allowNullColumnFor noColsMissingStats.getAndAdd(colNamesFailedStats.size()); if (allowNullColumnForMissingStats) { LOG.warn(logMsg); - HiveConf conf = SessionState.getSessionConf(); - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_SHOW_WARNINGS)) { - LogHelper console = SessionState.getConsole(); - console.printInfoNoLog(logMsg); - } } else { LOG.error(logMsg); throw new RuntimeException(logMsg); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java index 6efc731..a1b5aeb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java @@ -29,7 +29,6 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; @@ -137,13 +136,6 @@ public FilterReduceExpressionsRule(Class filterClass, newConditionExp)) { newConditionExp = ((RexCall) newConditionExp).getOperands().get(0); } - // reduce might end up creating an expression with null type - // e.g condition(null = null) is reduced to condition (null) with null type - // since this is a condition which will always be boolean type we cast it to - // boolean type - if(newConditionExp.getType().getSqlTypeName() == SqlTypeName.NULL) { - newConditionExp = call.builder().cast(newConditionExp, SqlTypeName.BOOLEAN); - } call.transformTo(call.builder(). push(filter.getInput()).filter(newConditionExp).build()); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java index 9c26801..5ab36db 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -70,7 +70,6 @@ import org.apache.calcite.sql.fun.SqlCountAggFunction; import org.apache.calcite.sql.fun.SqlSingleValueAggFunction; import org.apache.calcite.sql.fun.SqlStdOperatorTable; -import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.Bug; import org.apache.calcite.util.Holder; @@ -908,7 +907,7 @@ public Frame decorrelateRel(HiveProject rel) throws SemanticException{ newPos++; } - RelNode newProject = HiveProject.create(frame.r, Pair.left(projects), SqlValidatorUtil.uniquify(Pair.right(projects))); + RelNode newProject = HiveProject.create(frame.r, Pair.left(projects), Pair.right(projects)); return register(rel, newProject, mapOldToNewOutputPos, mapCorVarToOutputPos); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java index c1768f4..564ef7a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -250,34 +250,6 @@ protected RexNode apply(RexSubQuery e, Set variablesSet, switch (e.getKind()) { case IN: fields.addAll(builder.fields()); - // Transformation: sq_count_check(count(*), true) FILTER is generated on top - // of subquery which is then joined (LEFT or INNER) with outer query - // This transformation is done to add run time check using sq_count_check to - // throw an error if subquery is producing zero row, since with aggregate this - // will produce wrong results (because we further rewrite such queries into JOIN) - if(isCorrScalarAgg) { - // returns single row/column - builder.aggregate(builder.groupKey(), - builder.count(false, "cnt_in")); - - if (!variablesSet.isEmpty()) { - builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); - } else { - builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); - } - - SqlFunction inCountCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, - InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, SqlFunctionCategory.USER_DEFINED_FUNCTION); - - // we create FILTER (sq_count_check(count()) > 0) instead of PROJECT because RelFieldTrimmer - // ends up getting rid of Project since it is not used further up the tree - builder.filter(builder.call(SqlStdOperatorTable.GREATER_THAN, - //true here indicates that sq_count_check is for IN/NOT IN subqueries - builder.call(inCountCheck, builder.field("cnt_in"), builder.literal(true)), - builder.literal(0))); - offset = offset + 1; - builder.push(e.rel); - } } // First, the cross join @@ -312,14 +284,14 @@ protected RexNode apply(RexSubQuery e, Set variablesSet, switch (logic) { case TRUE: if (fields.isEmpty()) { - builder.project(builder.alias(builder.literal(true), "i" + e.rel.getId())); + builder.project(builder.alias(builder.literal(true), "i")); builder.aggregate(builder.groupKey(0)); } else { builder.aggregate(builder.groupKey(fields)); } break; default: - fields.add(builder.alias(builder.literal(true), "i" + e.rel.getId())); + fields.add(builder.alias(builder.literal(true), "i")); builder.project(fields); builder.distinct(); } @@ -359,7 +331,7 @@ protected RexNode apply(RexSubQuery e, Set variablesSet, operands.add((builder.isNull(builder.field("ct", "c"))), builder.literal(false)); break; } - operands.add(builder.isNotNull(builder.field("dt", "i" + e.rel.getId())), + operands.add(builder.isNotNull(builder.field("dt", "i")), builder.literal(true)); if (!keyIsNulls.isEmpty()) { //Calcite creates null literal with Null type here but because HIVE doesn't support null type diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 439950b..e3d9d7f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -335,7 +335,6 @@ public Vectorizer() { supportedGenericUDFs.add(GenericUDFNvl.class); supportedGenericUDFs.add(GenericUDFElt.class); supportedGenericUDFs.add(GenericUDFInitCap.class); - supportedGenericUDFs.add(GenericUDFInBloomFilter.class); // For type casts supportedGenericUDFs.add(UDFToLong.class); @@ -369,7 +368,6 @@ public Vectorizer() { supportedAggregationUdfs.add("stddev"); supportedAggregationUdfs.add("stddev_pop"); supportedAggregationUdfs.add("stddev_samp"); - supportedAggregationUdfs.add("bloom_filter"); } private class VectorTaskColumnInfo { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index ce435f9..0f472e7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1361,34 +1361,30 @@ private void analyzeAlterTableProps(String[] qualified, HashMap HashMap mapProp = getProps((ASTNode) (ast.getChild(0)) .getChild(0)); EnvironmentContext environmentContext = null; - // we need to check if the properties are valid, especially for stats. - // they might be changed via alter table .. update statistics or - // alter table .. set tblproperties. If the property is not row_count - // or raw_data_size, it could not be changed through update statistics - boolean changeStatsSucceeded = false; - for (Entry entry : mapProp.entrySet()) { - // we make sure that we do not change anything if there is anything - // wrong. - if (entry.getKey().equals(StatsSetupConst.ROW_COUNT) - || entry.getKey().equals(StatsSetupConst.RAW_DATA_SIZE)) { - try { - Long.parseLong(entry.getValue()); - changeStatsSucceeded = true; - } catch (Exception e) { - throw new SemanticException("AlterTable " + entry.getKey() + " failed with value " - + entry.getValue()); - } - } else { - if (queryState.getCommandType() - .equals(HiveOperation.ALTERTABLE_UPDATETABLESTATS.getOperationName()) - || queryState.getCommandType() - .equals(HiveOperation.ALTERTABLE_UPDATEPARTSTATS.getOperationName())) { + if (queryState.getCommandType() + .equals(HiveOperation.ALTERTABLE_UPDATETABLESTATS.getOperationName()) + || queryState.getCommandType() + .equals(HiveOperation.ALTERTABLE_UPDATEPARTSTATS.getOperationName())) { + // we need to check if the properties are valid, especially for stats. + boolean changeStatsSucceeded = false; + for (Entry entry : mapProp.entrySet()) { + // we make sure that we do not change anything if there is anything + // wrong. + if (entry.getKey().equals(StatsSetupConst.ROW_COUNT) + || entry.getKey().equals(StatsSetupConst.RAW_DATA_SIZE)) { + try { + Long.parseLong(entry.getValue()); + changeStatsSucceeded = true; + } catch (Exception e) { + throw new SemanticException("AlterTable " + entry.getKey() + " failed with value " + + entry.getValue()); + } + } else { throw new SemanticException("AlterTable UpdateStats " + entry.getKey() - + " failed because the only valid keys are " + StatsSetupConst.ROW_COUNT + " and " + + " failed because the only valid keys are" + StatsSetupConst.ROW_COUNT + " and " + StatsSetupConst.RAW_DATA_SIZE); } } - if (changeStatsSucceeded) { environmentContext = new EnvironmentContext(); environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.USER); @@ -3028,9 +3024,7 @@ private void analyzeMetastoreCheck(CommonTree ast) throws SemanticException { tableName = getUnescapedName((ASTNode) ast.getChild(1)); } } - Table tab = getTable(tableName); - List> specs = getPartitionSpecs(tab, ast); - outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED)); + List> specs = getPartitionSpecs(getTable(tableName), ast); MsckDesc checkDesc = new MsckDesc(tableName, specs, ctx.getResFile(), repair); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java index 796ccc8..34e53d2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java @@ -31,6 +31,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileChecksum; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; @@ -77,6 +78,7 @@ public static final String METADATA_NAME = "_metadata"; public static final String FILES_NAME = "_files"; public static final String DATA_PATH_NAME = "data"; + public static final String URI_FRAGMENT_SEPARATOR = "#"; private static final Logger LOG = LoggerFactory.getLogger(EximUtil.class); @@ -572,4 +574,19 @@ public boolean accept(Path p) { }; } + public static String getCMEncodedFileName(String fileURIStr, String fileChecksum) { + // The checksum is set as the fragment portion of the file uri + return fileURIStr + URI_FRAGMENT_SEPARATOR + fileChecksum; + } + + public static String getCMDecodedFileName(String encodedFileURIStr) { + String[] uriAndFragment = encodedFileURIStr.split(URI_FRAGMENT_SEPARATOR); + return uriAndFragment[0]; + } + + public static FileChecksum getCMDecodedChecksum(String encodedFileURIStr) { + // TODO: Implement this as part of HIVE-15490 + return null; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index aee74ad..6141391 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -27,7 +27,6 @@ import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -569,7 +568,7 @@ public static void removeSemiJoinOperator(ParseContext context, TypeInfoFactory.booleanTypeInfo, Boolean.TRUE); DynamicValuePredicateContext filterDynamicValuePredicatesCollection = new DynamicValuePredicateContext(); - collectDynamicValuePredicates(((FilterOperator)(ts.getChildOperators().get(0))).getConf().getPredicate(), + collectDynamicValuePredicates(ts.getConf().getFilterExpr(), filterDynamicValuePredicatesCollection); for (ExprNodeDesc nodeToRemove : filterDynamicValuePredicatesCollection .childParentMapping.keySet()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/PreInsertTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/parse/PreInsertTableDesc.java new file mode 100644 index 0000000..2c8e1e1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PreInsertTableDesc.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.DDLDesc; +import org.apache.hadoop.hive.ql.plan.Explain; + +@Explain(displayName = "Pre-Insert task", explainLevels = { Explain.Level.USER, Explain.Level.DEFAULT, Explain.Level.EXTENDED }) +public class PreInsertTableDesc extends DDLDesc { + private final boolean isOverwrite; + private final Table table; + + public PreInsertTableDesc(Table table, boolean overwrite) { + this.table = table; + this.isOverwrite = overwrite; + } + + public Table getTable() { + return table; + } + + public boolean isOverwrite() { + return isOverwrite; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java index 87ff581..7ca722a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java @@ -53,21 +53,11 @@ public static SubQueryType get(ASTNode opNode) throws SemanticException { } switch(opNode.getType()) { - // opNode's type is always either KW_EXISTS or KW_IN never NOTEXISTS or NOTIN - // to figure this out we need to check it's grand parent's parent case HiveParser.KW_EXISTS: - if(opNode.getParent().getParent().getParent() != null - && opNode.getParent().getParent().getParent().getType() == HiveParser.KW_NOT) { - return NOT_EXISTS; - } return EXISTS; case HiveParser.TOK_SUBQUERY_OP_NOTEXISTS: return NOT_EXISTS; case HiveParser.KW_IN: - if(opNode.getParent().getParent().getParent() != null - && opNode.getParent().getParent().getParent().getType() == HiveParser.KW_NOT) { - return NOT_IN; - } return IN; case HiveParser.TOK_SUBQUERY_OP_NOTIN: return NOT_IN; @@ -554,18 +544,13 @@ boolean subqueryRestrictionsCheck(RowResolver parentQueryRR, boolean hasAggreateExprs = false; boolean hasWindowing = false; - - // we need to know if aggregate is COUNT since IN corr subq with count aggregate - // is not special cased later in subquery remove rule - boolean hasCount = false; for(int i= selectExprStart; i < selectClause.getChildCount(); i++ ) { ASTNode selectItem = (ASTNode) selectClause.getChild(i); int r = SubQueryUtils.checkAggOrWindowing(selectItem); - hasWindowing = hasWindowing | ( r == 3); - hasAggreateExprs = hasAggreateExprs | ( r == 1 | r== 2 ); - hasCount = hasCount | ( r == 2 ); + hasWindowing = hasWindowing | ( r == 2); + hasAggreateExprs = hasAggreateExprs | ( r == 1 ); } @@ -617,47 +602,30 @@ boolean subqueryRestrictionsCheck(RowResolver parentQueryRR, * Specification doc for details. * Similarly a not exists on a SubQuery with a implied GBY will always return false. */ - // Following is special cases for different type of subqueries which have aggregate and no implicit group by - // and are correlatd - // * EXISTS/NOT EXISTS - NOT allowed, throw an error for now. We plan to allow this later - // * SCALAR - only allow if it has non equi join predicate. This should return true since later in subquery remove - // rule we need to know about this case. - // * IN - always allowed, BUT returns true for cases with aggregate other than COUNT since later in subquery remove - // rule we need to know about this case. - // * NOT IN - always allow, but always return true because later subq remove rule will generate diff plan for this case if (hasAggreateExprs && - noImplicityGby) { - - if(operator.getType() == SubQueryType.EXISTS - || operator.getType() == SubQueryType.NOT_EXISTS) { - if(hasCorrelation) { + noImplicityGby ) { + + if( hasCorrelation && (operator.getType() == SubQueryType.EXISTS + || operator.getType() == SubQueryType.NOT_EXISTS + || operator.getType() == SubQueryType.IN + || operator.getType() == SubQueryType.NOT_IN)) { + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + subQueryAST, + "A predicate on EXISTS/NOT EXISTS/IN/NOT IN SubQuery with implicit Aggregation(no Group By clause) " + + "cannot be rewritten.")); + } + else if(operator.getType() == SubQueryType.SCALAR && hasNonEquiJoinPred) { + // throw an error if predicates are not equal throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( subQueryAST, - "A predicate on EXISTS/NOT EXISTS SubQuery with implicit Aggregation(no Group By clause) " + - "cannot be rewritten.")); - } - } - else if(operator.getType() == SubQueryType.SCALAR) { - if(hasNonEquiJoinPred) { - throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - subQueryAST, - "Scalar subqueries with aggregate cannot have non-equi join predicate")); - } - if(hasCorrelation) { - return true; - } + "Scalar subqueries with aggregate cannot have non-equi join predicate")); } - else if(operator.getType() == SubQueryType.IN) { - if(hasCount && hasCorrelation) { + else if(operator.getType() == SubQueryType.SCALAR && hasCorrelation) { return true; - } - } - else if (operator.getType() == SubQueryType.NOT_IN) { - if(hasCorrelation) { - return true; - } } + } + return false; } @@ -716,7 +684,7 @@ void validateAndRewriteAST(RowResolver outerQueryRR, ASTNode selectItem = (ASTNode) selectClause.getChild(i); int r = SubQueryUtils.checkAggOrWindowing(selectItem); - containsWindowing = containsWindowing | ( r == 3); + containsWindowing = containsWindowing | ( r == 2); containsAggregationExprs = containsAggregationExprs | ( r == 1 ); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java index 2b327db..86b6a6e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java @@ -27,7 +27,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.ReplChangeManager; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.NotificationEvent; @@ -142,24 +141,22 @@ public String toString(){ private final Path dumpRoot; private final Path dumpFile; - private Path cmRoot; public DumpMetaData(Path dumpRoot) { this.dumpRoot = dumpRoot; dumpFile = new Path(dumpRoot, DUMPMETADATA); } - public DumpMetaData(Path dumpRoot, DUMPTYPE lvl, Long eventFrom, Long eventTo, Path cmRoot){ + public DumpMetaData(Path dumpRoot, DUMPTYPE lvl, Long eventFrom, Long eventTo){ this(dumpRoot); - setDump(lvl, eventFrom, eventTo, cmRoot); + setDump(lvl, eventFrom, eventTo); } - public void setDump(DUMPTYPE lvl, Long eventFrom, Long eventTo, Path cmRoot){ + public void setDump(DUMPTYPE lvl, Long eventFrom, Long eventTo){ this.dumpType = lvl; this.eventFrom = eventFrom; this.eventTo = eventTo; this.initialized = true; - this.cmRoot = cmRoot; } public void loadDumpFromFile() throws SemanticException { @@ -169,11 +166,9 @@ public void loadDumpFromFile() throws SemanticException { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(dumpFile))); String line = null; if ( (line = br.readLine()) != null){ - String[] lineContents = line.split("\t", 5); - setDump(DUMPTYPE.valueOf(lineContents[0]), Long.valueOf(lineContents[1]), Long.valueOf(lineContents[2]), - new Path(lineContents[3])); - setPayload(lineContents[4].equals(Utilities.nullStringOutput) ? null : lineContents[4]); - ReplChangeManager.setCmRoot(cmRoot); + String[] lineContents = line.split("\t", 4); + setDump(DUMPTYPE.valueOf(lineContents[0]), Long.valueOf(lineContents[1]), Long.valueOf(lineContents[2])); + setPayload(lineContents[3].equals(Utilities.nullStringOutput) ? null : lineContents[3]); } else { throw new IOException("Unable to read valid values from dumpFile:"+dumpFile.toUri().toString()); } @@ -206,14 +201,6 @@ public Long getEventTo() throws SemanticException { return eventTo; } - public Path getCmRoot() { - return cmRoot; - } - - public void setCmRoot(Path cmRoot) { - this.cmRoot = cmRoot; - } - public Path getDumpFilePath() { return dumpFile; } @@ -230,8 +217,7 @@ private void initializeIfNot() throws SemanticException { } public void write() throws SemanticException { - writeOutput(Arrays.asList(dumpType.toString(), eventFrom.toString(), eventTo.toString(), - cmRoot.toString(), payload), dumpFile); + writeOutput(Arrays.asList(dumpType.toString(), eventFrom.toString(), eventTo.toString(), payload), dumpFile); } } @@ -315,7 +301,6 @@ private void analyzeReplDump(ASTNode ast) throws SemanticException { String replRoot = conf.getVar(HiveConf.ConfVars.REPLDIR); Path dumpRoot = new Path(replRoot, getNextDumpDir()); DumpMetaData dmd = new DumpMetaData(dumpRoot); - Path cmRoot = new Path(conf.getVar(HiveConf.ConfVars.REPLCMDIR)); Long lastReplId; try { if (eventFrom == null){ @@ -355,7 +340,7 @@ private void analyzeReplDump(ASTNode ast) throws SemanticException { LOG.info( "Consolidation done, preparing to return {},{}->{}", dumpRoot.toUri(), bootDumpBeginReplId, bootDumpEndReplId); - dmd.setDump(DUMPTYPE.BOOTSTRAP, bootDumpBeginReplId, bootDumpEndReplId, cmRoot); + dmd.setDump(DUMPTYPE.BOOTSTRAP, bootDumpBeginReplId, bootDumpEndReplId); dmd.write(); // Set the correct last repl id to return to the user @@ -390,14 +375,14 @@ private void analyzeReplDump(ASTNode ast) throws SemanticException { while (evIter.hasNext()){ NotificationEvent ev = evIter.next(); Path evRoot = new Path(dumpRoot, String.valueOf(ev.getEventId())); - dumpEvent(ev, evRoot, cmRoot); + dumpEvent(ev, evRoot); } LOG.info("Done dumping events, preparing to return {},{}", dumpRoot.toUri(), eventTo); writeOutput( Arrays.asList("incremental", String.valueOf(eventFrom), String.valueOf(eventTo)), dmd.getDumpFilePath()); - dmd.setDump(DUMPTYPE.INCREMENTAL, eventFrom, eventTo, cmRoot); + dmd.setDump(DUMPTYPE.INCREMENTAL, eventFrom, eventTo); dmd.write(); // Set the correct last repl id to return to the user lastReplId = eventTo; @@ -411,7 +396,7 @@ private void analyzeReplDump(ASTNode ast) throws SemanticException { } } - private void dumpEvent(NotificationEvent ev, Path evRoot, Path cmRoot) throws Exception { + private void dumpEvent(NotificationEvent ev, Path evRoot) throws Exception { long evid = ev.getEventId(); String evidStr = String.valueOf(evid); ReplicationSpec replicationSpec = getNewEventOnlyReplicationSpec(evidStr); @@ -454,7 +439,7 @@ private void dumpEvent(NotificationEvent ev, Path evRoot, Path cmRoot) throws Ex } } - (new DumpMetaData(evRoot, DUMPTYPE.EVENT_CREATE_TABLE, evid, evid, cmRoot)).write(); + (new DumpMetaData(evRoot, DUMPTYPE.EVENT_CREATE_TABLE, evid, evid)).write(); break; } case MessageFactory.ADD_PARTITION_EVENT : { @@ -519,19 +504,19 @@ public Partition apply(@Nullable org.apache.hadoop.hive.metastore.api.Partition } } - (new DumpMetaData(evRoot, DUMPTYPE.EVENT_ADD_PARTITION, evid, evid, cmRoot)).write(); + (new DumpMetaData(evRoot, DUMPTYPE.EVENT_ADD_PARTITION, evid, evid)).write(); break; } case MessageFactory.DROP_TABLE_EVENT : { LOG.info("Processing#{} DROP_TABLE message : {}", ev.getEventId(), ev.getMessage()); - DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_DROP_TABLE, evid, evid, cmRoot); + DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_DROP_TABLE, evid, evid); dmd.setPayload(ev.getMessage()); dmd.write(); break; } case MessageFactory.DROP_PARTITION_EVENT : { LOG.info("Processing#{} DROP_PARTITION message : {}", ev.getEventId(), ev.getMessage()); - DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_DROP_PARTITION, evid, evid, cmRoot); + DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_DROP_PARTITION, evid, evid); dmd.setPayload(ev.getMessage()); dmd.write(); break; @@ -555,12 +540,12 @@ public Partition apply(@Nullable org.apache.hadoop.hive.metastore.api.Partition null, replicationSpec); - DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_ALTER_TABLE, evid, evid, cmRoot); + DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_ALTER_TABLE, evid, evid); dmd.setPayload(ev.getMessage()); dmd.write(); } else { // rename scenario - DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_RENAME_TABLE, evid, evid, cmRoot); + DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_RENAME_TABLE, evid, evid); dmd.setPayload(ev.getMessage()); dmd.write(); } @@ -597,13 +582,13 @@ public Partition apply(@Nullable org.apache.hadoop.hive.metastore.api.Partition qlMdTable, qlPtns, replicationSpec); - DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_ALTER_PARTITION, evid, evid, cmRoot); + DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_ALTER_PARTITION, evid, evid); dmd.setPayload(ev.getMessage()); dmd.write(); break; } else { // rename scenario - DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_RENAME_PARTITION, evid, evid, cmRoot); + DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_RENAME_PARTITION, evid, evid); dmd.setPayload(ev.getMessage()); dmd.write(); break; @@ -641,7 +626,7 @@ public Partition apply(@Nullable org.apache.hadoop.hive.metastore.api.Partition } LOG.info("Processing#{} INSERT message : {}", ev.getEventId(), ev.getMessage()); - DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_INSERT, evid, evid, cmRoot); + DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_INSERT, evid, evid); dmd.setPayload(ev.getMessage()); dmd.write(); break; @@ -649,7 +634,7 @@ public Partition apply(@Nullable org.apache.hadoop.hive.metastore.api.Partition // TODO : handle other event types default: LOG.info("Dummy processing#{} message : {}", ev.getEventId(), ev.getMessage()); - DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_UNKNOWN, evid, evid, cmRoot); + DumpMetaData dmd = new DumpMetaData(evRoot, DUMPTYPE.EVENT_UNKNOWN, evid, evid); dmd.setPayload(ev.getMessage()); dmd.write(); break; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 248dd63..2446162 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -859,7 +859,7 @@ private static void writeAsText(String text, FSDataOutputStream out) throws IOEx } /** - * Generate a temp table out of a values clause + * Generate a temp table out of a value clause * See also {@link #preProcessForInsert(ASTNode, QB)} */ private ASTNode genValuesTempTable(ASTNode originalFrom, QB qb) throws SemanticException { @@ -2005,10 +2005,6 @@ private void getMetaData(QB qb, ReadEntity parentInput) replaceViewReferenceWithDefinition(qb, tab, tabName, alias); // This is the last time we'll see the Table objects for views, so add it to the inputs // now. isInsideView will tell if this view is embedded in another view. - // If the view is Inside another view, it should have at least one parent - if (qb.isInsideView() && parentInput == null) { - parentInput = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput); - } ReadEntity viewInput = new ReadEntity(tab, parentInput, !qb.isInsideView()); viewInput = PlanUtils.addInput(inputs, viewInput); aliasToViewInfo.put(alias, new ObjectPair(fullViewName, viewInput)); @@ -6775,7 +6771,10 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) // This is a non-native table. // We need to set stats as inaccurate. setStatsForNonNativeTable(dest_tab); - createInsertDesc(dest_tab, !qb.getParseInfo().isInsertIntoTable(dest_tab.getTableName())); + // true if it is insert overwrite. + boolean overwrite = !qb.getParseInfo().isInsertIntoTable( + String.format("%s.%s", dest_tab.getDbName(), dest_tab.getTableName())); + createInsertDesc(dest_tab, overwrite); } WriteEntity output = null; @@ -7197,7 +7196,10 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) private void createInsertDesc(Table table, boolean overwrite) { Task[] tasks = new Task[this.rootTasks.size()]; tasks = this.rootTasks.toArray(tasks); - InsertTableDesc insertTableDesc = new InsertTableDesc(table.getTTable(), overwrite); + PreInsertTableDesc preInsertTableDesc = new PreInsertTableDesc(table, overwrite); + InsertTableDesc insertTableDesc = new InsertTableDesc(table, overwrite); + this.rootTasks + .add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), preInsertTableDesc), conf)); TaskFactory .getAndMakeChild(new DDLWork(getInputs(), getOutputs(), insertTableDesc), conf, tasks); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java index f0165dd..bd771f9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java @@ -32,8 +32,6 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType; import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryTypeDef; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver; public class SubQueryUtils { @@ -254,8 +252,7 @@ static SubQueryTypeDef buildSQOperator(ASTNode astSQOp) throws SemanticException * @return * 0 if implies neither * 1 if implies aggregation - * 2 if implies count - * 3 if implies windowing + * 2 if implies windowing */ static int checkAggOrWindowing(ASTNode expressionTree) throws SemanticException { int exprTokenType = expressionTree.getToken().getType(); @@ -265,18 +262,12 @@ static int checkAggOrWindowing(ASTNode expressionTree) throws SemanticException assert (expressionTree.getChildCount() != 0); if (expressionTree.getChild(expressionTree.getChildCount()-1).getType() == HiveParser.TOK_WINDOWSPEC) { - return 3; + return 2; } if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { String functionName = SemanticAnalyzer.unescapeIdentifier(expressionTree.getChild(0) .getText()); - GenericUDAFResolver udafResolver = FunctionRegistry.getGenericUDAFResolver(functionName); - if (udafResolver != null) { - // we need to know if it is COUNT since this is specialized for IN/NOT IN - // corr subqueries. - if(udafResolver instanceof GenericUDAFCount) { - return 2; - } + if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) { return 1; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 47b229f..cf8e843 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -571,52 +571,53 @@ protected void optimizeTaskPlan(List> rootTasks, Pa return; } - private static class SMBJoinOpProcContext implements NodeProcessorCtx { - HashMap JoinOpToTsOpMap = new HashMap(); + private static class SemijoinRemovalContext implements NodeProcessorCtx { + List> parents = new ArrayList>(); } - private static class SMBJoinOpProc implements NodeProcessor { + private static class SemijoinRemovalProc implements NodeProcessor { @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - SMBJoinOpProcContext ctx = (SMBJoinOpProcContext) procCtx; - ctx.JoinOpToTsOpMap.put((CommonMergeJoinOperator) nd, - (TableScanOperator) stack.get(0)); + SemijoinRemovalContext ctx = (SemijoinRemovalContext) procCtx; + Operator parent = (Operator) stack.get(stack.size() - 2); + ctx.parents.add(parent); return null; } } - private static void removeSemijoinOptimizationFromSMBJoins( - OptimizeTezProcContext procCtx) throws SemanticException { - if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || - procCtx.parseContext.getRsOpToTsOpMap().size() == 0) { - return; - } - + private static void collectSemijoinOps(Operator ts, NodeProcessorCtx ctx) throws SemanticException { + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. The dispatcher + // generates the plan from the operator tree Map opRules = new LinkedHashMap(); - opRules.put( - new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%" + - ".*" + TezDummyStoreOperator.getOperatorName() + "%" + + opRules.put(new RuleRegExp("R1", SelectOperator.getOperatorName() + "%" + + TezDummyStoreOperator.getOperatorName() + "%"), + new SemijoinRemovalProc()); + opRules.put(new RuleRegExp("R2", SelectOperator.getOperatorName() + "%" + CommonMergeJoinOperator.getOperatorName() + "%"), - new SMBJoinOpProc()); - - SMBJoinOpProcContext ctx = new SMBJoinOpProcContext(); - // The dispatcher finds SMB and if there is semijoin optimization before it, removes it. + new SemijoinRemovalProc()); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx); - List topNodes = new ArrayList(); - topNodes.addAll(procCtx.parseContext.getTopOps().values()); GraphWalker ogw = new PreOrderOnceWalker(disp); - ogw.startWalking(topNodes, null); + List startNodes = new ArrayList(); + startNodes.add(ts); + + HashMap outputMap = new HashMap(); + ogw.startWalking(startNodes, null); + } + + private static class SMBJoinOpProc implements NodeProcessor { - // Iterate over the map and remove semijoin optimizations if needed. - for (CommonMergeJoinOperator joinOp : ctx.JoinOpToTsOpMap.keySet()) { + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { List tsOps = new ArrayList(); // Get one top level TS Op directly from the stack - tsOps.add(ctx.JoinOpToTsOpMap.get(joinOp)); + tsOps.add((TableScanOperator)stack.get(0)); // Get the other one by examining Join Op - List> parents = joinOp.getParentOperators(); + List> parents = ((CommonMergeJoinOperator) nd).getParentOperators(); for (Operator parent : parents) { if (parent instanceof TezDummyStoreOperator) { // already accounted for @@ -635,7 +636,7 @@ private static void removeSemijoinOptimizationFromSMBJoins( // Now the relevant TableScanOperators are known, find if there exists // a semijoin filter on any of them, if so, remove it. - ParseContext pctx = procCtx.parseContext; + ParseContext pctx = ((OptimizeTezProcContext) procCtx).parseContext; for (TableScanOperator ts : tsOps) { for (ReduceSinkOperator rs : pctx.getRsOpToTsOpMap().keySet()) { if (ts == pctx.getRsOpToTsOpMap().get(rs)) { @@ -645,27 +646,11 @@ private static void removeSemijoinOptimizationFromSMBJoins( } } } - } - } - - private static class SemiJoinCycleRemovalDueTOMapsideJoinContext implements NodeProcessorCtx { - HashMap,Operator> childParentMap = new HashMap,Operator>(); - } - - private static class SemiJoinCycleRemovalDueToMapsideJoins implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - SemiJoinCycleRemovalDueTOMapsideJoinContext ctx = - (SemiJoinCycleRemovalDueTOMapsideJoinContext) procCtx; - ctx.childParentMap.put((Operator)stack.get(stack.size() - 2), (Operator) nd); return null; } } - private static void removeSemiJoinCyclesDueToMapsideJoins( + private static void removeSemijoinOptimizationFromSMBJoins( OptimizeTezProcContext procCtx) throws SemanticException { if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || procCtx.parseContext.getRsOpToTsOpMap().size() == 0) { @@ -674,37 +659,31 @@ private static void removeSemiJoinCyclesDueToMapsideJoins( Map opRules = new LinkedHashMap(); opRules.put( - new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%" + - MapJoinOperator.getOperatorName() + "%"), - new SemiJoinCycleRemovalDueToMapsideJoins()); - opRules.put( - new RuleRegExp("R2", MapJoinOperator.getOperatorName() + "%" + - CommonMergeJoinOperator.getOperatorName() + "%"), - new SemiJoinCycleRemovalDueToMapsideJoins()); - opRules.put( - new RuleRegExp("R3", CommonMergeJoinOperator.getOperatorName() + "%" + - MapJoinOperator.getOperatorName() + "%"), - new SemiJoinCycleRemovalDueToMapsideJoins()); - opRules.put( - new RuleRegExp("R4", CommonMergeJoinOperator.getOperatorName() + "%" + + new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%" + + ".*" + TezDummyStoreOperator.getOperatorName() + "%" + CommonMergeJoinOperator.getOperatorName() + "%"), - new SemiJoinCycleRemovalDueToMapsideJoins()); + new SMBJoinOpProc()); - SemiJoinCycleRemovalDueTOMapsideJoinContext ctx = - new SemiJoinCycleRemovalDueTOMapsideJoinContext(); - Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx); + // The dispatcher finds SMB and if there is semijoin optimization before it, removes it. + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); List topNodes = new ArrayList(); topNodes.addAll(procCtx.parseContext.getTopOps().values()); GraphWalker ogw = new PreOrderOnceWalker(disp); ogw.startWalking(topNodes, null); + } - // process the list - ParseContext pCtx = procCtx.parseContext; - for (Operator parentJoin : ctx.childParentMap.keySet()) { - Operator childJoin = ctx.childParentMap.get(parentJoin); + private static class SemiJoinCycleRemovalDueToMapsideJoins implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext; + Operator childJoin = ((Operator) nd); + Operator parentJoin = ((Operator) stack.get(stack.size() - 2)); if (parentJoin.getChildOperators().size() == 1) { - continue; + // Nothing to do here + return null; } for (Operator child : parentJoin.getChildOperators()) { @@ -744,7 +723,40 @@ private static void removeSemiJoinCyclesDueToMapsideJoins( } } } + return null; + } + } + + private static void removeSemiJoinCyclesDueToMapsideJoins( + OptimizeTezProcContext procCtx) throws SemanticException { + if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || + procCtx.parseContext.getRsOpToTsOpMap().size() == 0) { + return; } + + Map opRules = new LinkedHashMap(); + opRules.put( + new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%" + + MapJoinOperator.getOperatorName() + "%"), + new SemiJoinCycleRemovalDueToMapsideJoins()); + opRules.put( + new RuleRegExp("R2", MapJoinOperator.getOperatorName() + "%" + + CommonMergeJoinOperator.getOperatorName() + "%"), + new SemiJoinCycleRemovalDueToMapsideJoins()); + opRules.put( + new RuleRegExp("R3", CommonMergeJoinOperator.getOperatorName() + "%" + + MapJoinOperator.getOperatorName() + "%"), + new SemiJoinCycleRemovalDueToMapsideJoins()); + opRules.put( + new RuleRegExp("R4", CommonMergeJoinOperator.getOperatorName() + "%" + + CommonMergeJoinOperator.getOperatorName() + "%"), + new SemiJoinCycleRemovalDueToMapsideJoins()); + + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); + List topNodes = new ArrayList(); + topNodes.addAll(procCtx.parseContext.getTopOps().values()); + GraphWalker ogw = new PreOrderOnceWalker(disp); + ogw.startWalking(topNodes, null); } private static class SemiJoinRemovalIfNoStatsProc implements NodeProcessor { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index f979c14..4aff56b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -54,7 +54,6 @@ import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.lib.ExpressionWalker; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; -import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; @@ -1401,9 +1400,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ASTNode sqNode = (ASTNode) expr.getParent().getChild(1); if (!ctx.getallowSubQueryExpr()) - throw new CalciteSubquerySemanticException(SemanticAnalyzer.generateErrorMessage(sqNode, - ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg("Currently SubQuery expressions are only allowed as " + - "Where and Having Clause predicates"))); + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(sqNode, + ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg())); ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); if (desc != null) { @@ -1429,7 +1427,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Map subqueryToRelNode = ctx.getSubqueryToRelNode(); if(subqueryToRelNode == null) { - throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( " Currently SubQuery expressions are only allowed as " + "Where and Having Clause predicates")); } @@ -1451,7 +1449,7 @@ else if(isIN) { else if(isScalar){ // only single subquery expr is supported if(subqueryRel.getRowType().getFieldCount() != 1) { - throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( "More than one column expression in subquery")); } // figure out subquery expression column's type diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java index f102786..79355ba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java @@ -29,12 +29,9 @@ import java.util.Set; import org.antlr.runtime.TokenRewriteStream; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryState; @@ -130,19 +127,16 @@ private void addPartitionColsToInsert(List partCols, StringBuilder /** * Append list of partition columns to Insert statement, i.e. the 2nd set of partCol1,partCol2 * INSERT INTO T PARTITION(partCol1,partCol2...) SELECT col1, ... partCol1,partCol2... - * @param target target table + * @param targetName simple target table name (i.e. name or alias) */ - private void addPartitionColsToSelect(List partCols, StringBuilder rewrittenQueryStr, - ASTNode target) throws SemanticException { - String targetName = target != null ? getSimpleTableName(target) : null; - + private void addPartitionColsToSelect(List partCols, StringBuilder rewrittenQueryStr, String targetName) { // If the table is partitioned, we need to select the partition columns as well. if (partCols != null) { for (FieldSchema fschema : partCols) { rewrittenQueryStr.append(", "); //would be nice if there was a way to determine if quotes are needed if(targetName != null) { - rewrittenQueryStr.append(targetName).append('.'); + rewrittenQueryStr.append(HiveUtils.unparseIdentifier(targetName, this.conf)).append('.'); } rewrittenQueryStr.append(HiveUtils.unparseIdentifier(fschema.getName(), this.conf)); } @@ -696,15 +690,13 @@ WHEN NOT MATCHED THEN INSERT VALUES(source.a2, source.b2) if(numWhenMatchedDeleteClauses + numWhenMatchedUpdateClauses == 2 && extraPredicate == null) { throw new SemanticException(ErrorMsg.MERGE_PREDIACTE_REQUIRED, ctx.getCmd()); } - handleCardinalityViolation(rewrittenQueryStr, target, onClauseAsText, targetTable); + ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd()); Context rewrittenCtx = rr.rewrittenCtx; ASTNode rewrittenTree = rr.rewrittenTree; //set dest name mapping on new context - for(int insClauseIdx = 1, whenClauseIdx = 0; - insClauseIdx < rewrittenTree.getChildCount() - 1/*skip cardinality violation clause*/; - insClauseIdx++, whenClauseIdx++) { + for(int insClauseIdx = 1, whenClauseIdx = 0; insClauseIdx < rewrittenTree.getChildCount(); insClauseIdx++, whenClauseIdx++) { //we've added Insert clauses in order or WHEN items in whenClauses ASTNode insertClause = (ASTNode) rewrittenTree.getChild(insClauseIdx); switch (getWhenClauseOperation(whenClauses.get(whenClauseIdx)).getType()) { @@ -818,61 +810,6 @@ private boolean isTargetTable(Entity entity, Table targetTable) { */ return targetTable.equals(entity.getTable()); } - - /** - * Per SQL Spec ISO/IEC 9075-2:2011(E) Section 14.2 under "General Rules" Item 6/Subitem a/Subitem 2/Subitem B, - * an error should be raised if > 1 row of "source" matches the same row in "target". - * This should not affect the runtime of the query as it's running in parallel with other - * branches of the multi-insert. It won't actually write any data to merge_tmp_table since the - * cardinality_violation() UDF throws an error whenever it's called killing the query - */ - private void handleCardinalityViolation(StringBuilder rewrittenQueryStr, ASTNode target, - String onClauseAsString, Table targetTable) - throws SemanticException { - if(!conf.getBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK)) { - LOG.info("Merge statement cardinality violation check is disabled: " + - HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK.varname); - return; - } - //this is a tmp table and thus Session scoped and acid requires SQL statement to be serial in a - // given session, i.e. the name can be fixed across all invocations - String tableName = "merge_tmp_table"; - rewrittenQueryStr.append("\nINSERT INTO ").append(tableName) - .append("\n SELECT cardinality_violation(") - .append(getSimpleTableName(target)).append(".ROW__ID"); - addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, target); - - rewrittenQueryStr.append(")\n WHERE ").append(onClauseAsString) - .append(" GROUP BY ").append(getSimpleTableName(target)).append(".ROW__ID"); - - addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, target); - - rewrittenQueryStr.append(" HAVING count(*) > 1"); - //say table T has partiton p, we are generating - //select cardinality_violation(ROW_ID, p) WHERE ... GROUP BY ROW__ID, p - //the Group By args are passed to cardinality_violation to add the violating value to the error msg - try { - if (null == db.getTable(tableName, false)) { - StorageFormat format = new StorageFormat(conf); - format.processStorageFormat("TextFile"); - Table table = db.newTable(tableName); - table.setSerializationLib(format.getSerde()); - List fields = new ArrayList(); - fields.add(new FieldSchema("val", "int", null)); - table.setFields(fields); - table.setDataLocation(Warehouse.getDnsPath(new Path(SessionState.get().getTempTableSpace(), - tableName), conf)); - table.getTTable().setTemporary(true); - table.setStoredAsSubDirectories(false); - table.setInputFormatClass(format.getInputFormat()); - table.setOutputFormatClass(format.getOutputFormat()); - db.createTable(table, true); - } - } - catch(HiveException|MetaException e) { - throw new SemanticException(e.getMessage(), e); - } - } /** * @param onClauseAsString - because there is no clone() and we need to use in multiple places * @param deleteExtraPredicate - see notes at caller @@ -912,7 +849,7 @@ private String handleUpdate(ASTNode whenMatchedUpdateClause, StringBuilder rewri rewrittenQueryStr.append(getSimpleTableName(target)).append(".").append(HiveUtils.unparseIdentifier(name, this.conf)); } } - addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, target); + addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, targetName); rewrittenQueryStr.append("\n WHERE ").append(onClauseAsString); String extraPredicate = getWhenClausePredicate(whenMatchedUpdateClause); if(extraPredicate != null) { @@ -946,7 +883,7 @@ private String handleDelete(ASTNode whenMatchedDeleteClause, StringBuilder rewri addPartitionColsToInsert(partCols, rewrittenQueryStr); rewrittenQueryStr.append(" -- delete clause\n select ").append(targetName).append(".ROW__ID "); - addPartitionColsToSelect(partCols, rewrittenQueryStr, target); + addPartitionColsToSelect(partCols, rewrittenQueryStr, targetName); rewrittenQueryStr.append("\n WHERE ").append(onClauseAsString); String extraPredicate = getWhenClausePredicate(whenMatchedDeleteClause); if(extraPredicate != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java index c4efb3f..2b9e897 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.parse.AlterTablePartMergeFilesDesc; +import org.apache.hadoop.hive.ql.parse.PreInsertTableDesc; import org.apache.hadoop.hive.ql.plan.Explain.Level; /** @@ -32,6 +33,7 @@ public class DDLWork implements Serializable { private static final long serialVersionUID = 1L; + private PreInsertTableDesc preInsertTableDesc; private InsertTableDesc insertTableDesc; private CreateIndexDesc createIndexDesc; private AlterIndexDesc alterIndexDesc; @@ -532,6 +534,12 @@ public DDLWork(HashSet inputs, HashSet outputs, this.insertTableDesc = insertTableDesc; } + public DDLWork(HashSet inputs, HashSet outputs, + PreInsertTableDesc preInsertTableDesc) { + this(inputs, outputs); + this.preInsertTableDesc = preInsertTableDesc; + } + /** * @return Create Database descriptor */ @@ -1202,4 +1210,13 @@ public InsertTableDesc getInsertTableDesc() { public void setInsertTableDesc(InsertTableDesc insertTableDesc) { this.insertTableDesc = insertTableDesc; } + + @Explain(displayName = "Pre Insert operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + public PreInsertTableDesc getPreInsertTableDesc() { + return preInsertTableDesc; + } + + public void setPreInsertTableDesc(PreInsertTableDesc preInsertTableDesc) { + this.preInsertTableDesc = preInsertTableDesc; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/InsertTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/InsertTableDesc.java index 1397b8a..212bc7a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/InsertTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/InsertTableDesc.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hive.ql.plan; -import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.metadata.Table; @Explain(displayName = "Insert", explainLevels = { Explain.Level.USER, Explain.Level.DEFAULT, Explain.Level.EXTENDED }) public class InsertTableDesc extends DDLDesc { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index 05d2c81..b2c5865 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -1127,8 +1127,7 @@ public static ReadEntity getParentViewInfo(String alias_id, // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T // -> implies depends on. // T's parent would be V1 - // do not check last alias in the array for parent can not be itself. - for (int pos = 0; pos < aliases.length -1; pos++) { + for (int pos = 0; pos < aliases.length; pos++) { currentAlias = currentAlias == null ? aliases[pos] : currentAlias + ":" + aliases[pos]; currentAlias = currentAlias.replace(SemanticAnalyzer.SUBQUERY_TAG_1, "") diff --git ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java index 18b0e1c..e7bbd54 100644 --- ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java +++ ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java @@ -161,12 +161,9 @@ public HivePrivilegeObjectType getObjectType() { op2Priv.put(HiveOperationType.DESCFUNCTION, PrivRequirement.newIOPrivRequirement (null, null)); - // meta store check command - equivalent to add partition command - // no input objects are passed to it currently, but keeping admin priv - // requirement on inputs just in case some input object like file - // uri is added later + // meta store check command - require admin priv op2Priv.put(HiveOperationType.MSCK, PrivRequirement.newIOPrivRequirement -(ADMIN_PRIV_AR, INS_NOGRANT_AR)); +(ADMIN_PRIV_AR, null)); //alter table commands require table ownership diff --git ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index d607f61..453e0a5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -1099,12 +1099,6 @@ public void printInfo(String info, String detail) { LOG.info(info + StringUtils.defaultString(detail)); } - public void printInfoNoLog(String info) { - if (!getIsSilent()) { - getInfoStream().println(info); - } - } - public void printError(String error) { printError(error, null); } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java index deb0f76..fb9a140 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java @@ -24,7 +24,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.*; @@ -73,8 +72,6 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticE // Bloom filter rest private ByteArrayOutputStream result = new ByteArrayOutputStream(); - private transient byte[] scratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES]; - @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { super.init(m, parameters); @@ -170,10 +167,9 @@ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveExcep bf.addDouble(vDouble); break; case DECIMAL: - HiveDecimalWritable vDecimal = ((HiveDecimalObjectInspector)inputOI). - getPrimitiveWritableObject(parameters[0]); - int startIdx = vDecimal.toBytes(scratchBuffer); - bf.addBytes(scratchBuffer, startIdx, scratchBuffer.length - startIdx); + HiveDecimal vDecimal = ((HiveDecimalObjectInspector)inputOI). + getPrimitiveJavaObject(parameters[0]); + bf.addString(vDecimal.toString()); break; case DATE: DateWritable vDate = ((DateObjectInspector)inputOI). diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java deleted file mode 100644 index 0724ff4e..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java +++ /dev/null @@ -1,64 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.udf.generic; - -import java.util.ArrayList; - -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; -import org.apache.logging.log4j.core.layout.StringBuilderEncoder; - -/** - * GenericUDFArray. - * - */ -@Description(name = "cardinality_violation", - value = "_FUNC_(n0, n1...) - raises Cardinality Violation") -public class GenericUDFCardinalityViolation extends GenericUDF { - private transient Converter[] converters; - private transient ArrayList ret = new ArrayList(); - - @Override - public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - return PrimitiveObjectInspectorFactory.javaIntObjectInspector; - } - - @Override - public Object evaluate(DeferredObject[] arguments) throws HiveException { - StringBuilder nonUniqueKey = new StringBuilder(); - for(DeferredObject t : arguments) { - if(nonUniqueKey.length() > 0) {nonUniqueKey.append(','); } - nonUniqueKey.append(t.get()); - } - throw new RuntimeException("Cardinality Violation in Merge statement: " + nonUniqueKey); - } - - @Override - public String getDisplayString(String[] children) { - return getStandardDisplayString("cardinality_violation", children, ","); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInBloomFilter.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInBloomFilter.java index 3e6e069..1b7de6c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInBloomFilter.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInBloomFilter.java @@ -22,11 +22,8 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorInBloomFilterColDynamicValue; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.*; @@ -44,7 +41,6 @@ /** * GenericUDF to lookup a value in BloomFilter */ -@VectorizedExpressions({VectorInBloomFilterColDynamicValue.class}) public class GenericUDFInBloomFilter extends GenericUDF { private static final Logger LOG = LoggerFactory.getLogger(GenericUDFInBloomFilter.class); @@ -52,7 +48,6 @@ private transient ObjectInspector bloomFilterObjectInspector; private transient BloomFilter bloomFilter; private transient boolean initializedBloomFilter; - private transient byte[] scratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES]; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -138,10 +133,9 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { get(arguments[0].get()); return bloomFilter.testDouble(vDouble); case DECIMAL: - HiveDecimalWritable vDecimal = ((HiveDecimalObjectInspector) valObjectInspector). - getPrimitiveWritableObject(arguments[0].get()); - int startIdx = vDecimal.toBytes(scratchBuffer); - return bloomFilter.testBytes(scratchBuffer, startIdx, scratchBuffer.length - startIdx); + HiveDecimal vDecimal = ((HiveDecimalObjectInspector) valObjectInspector). + getPrimitiveJavaObject(arguments[0].get()); + return bloomFilter.testString(vDecimal.toString()); case DATE: DateWritable vDate = ((DateObjectInspector) valObjectInspector). getPrimitiveWritableObject(arguments[0].get()); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualNS.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualNS.java index 7574d2c..3707a33 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualNS.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualNS.java @@ -40,9 +40,4 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { } return super.evaluate(arguments); } - - @Override - public GenericUDF negative() { - return new GenericUDFOPNotEqualNS(); - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqualNS.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqualNS.java deleted file mode 100644 index 4e89423..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqualNS.java +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.udf.generic; - -import org.apache.hadoop.hive.ql.metadata.HiveException; - -// this function is for internal use only -public class GenericUDFOPNotEqualNS extends GenericUDFOPNotEqual { - - @Override - public Object evaluate(DeferredObject[] arguments) throws HiveException { - Object o0 = arguments[0].get(); - Object o1 = arguments[1].get(); - if (o0 == null && o1 == null) { - result.set(false); - return result; - } - if (o0 == null || o1 == null) { - result.set(true); - return result; - } - return super.evaluate(arguments); - } - - @Override - public GenericUDF negative() { - return new GenericUDFOPEqualNS(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSQCountCheck.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSQCountCheck.java index 89fa0de..53e6231 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSQCountCheck.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSQCountCheck.java @@ -46,9 +46,9 @@ @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length > 2) { + if (arguments.length != 1) { throw new UDFArgumentLengthException( - "Invalid scalar subquery expression. Subquery count check expected two argument but received: " + arguments.length); + "Invalid scalar subquery expression. Subquery count check expected one argument but received: " + arguments.length); } converters[0] = ObjectInspectorConverters.getConverter(arguments[0], @@ -63,23 +63,11 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen public Object evaluate(DeferredObject[] arguments) throws HiveException { Object valObject = arguments[0].get(); assert(valObject != null); - Long val = getLongValue(arguments, 0, converters); assert(val >= 0); - - switch (arguments.length){ - case 1: //Scalar queries, should expect value/count less than 1 - if (val > 1) { - throw new UDFArgumentException( - " Scalar subquery expression returns more than one row."); - } - break; - case 2: - if (val == 0) { // IN/NOT IN subqueries with aggregate - throw new UDFArgumentException( - " IN/NOT IN subquery with aggregate returning zero result. Currently this is not supported."); - } - break; + if(val > 1) { + throw new UDFArgumentException( + " Scalar subquery expression returns more than one row."); } resultLong.set(val); diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index a90dd35..9e2179c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -80,7 +80,6 @@ private Driver d; private static enum Table { ACIDTBL("acidTbl"), - ACIDTBLPART("acidTblPart"), ACIDTBL2("acidTbl2"), NONACIDORCTBL("nonAcidOrcTbl"), NONACIDORCTBL2("nonAcidOrcTbl2"); @@ -107,7 +106,6 @@ public void setUp() throws Exception { hiveConf .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); - hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, true); TxnDbUtil.setConfValues(hiveConf); TxnDbUtil.prepDb(); File f = new File(TEST_WAREHOUSE_DIR); @@ -122,7 +120,6 @@ public void setUp() throws Exception { d.setMaxRows(10000); dropTables(); runStatementOnDriver("create table " + Table.ACIDTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); - runStatementOnDriver("create table " + Table.ACIDTBLPART + "(a int, b int) partitioned by (p string) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); runStatementOnDriver("create table " + Table.NONACIDORCTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); runStatementOnDriver("create table " + Table.NONACIDORCTBL2 + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); runStatementOnDriver("create temporary table " + Table.ACIDTBL2 + "(a int, b int, c int) clustered by (c) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); @@ -762,11 +759,10 @@ public void testMergeOnTezEdges() throws Exception { LOG.info("Explain1: " + sb); for(int i = 0; i < explain.size(); i++) { if(explain.get(i).contains("Edges:")) { - Assert.assertTrue("At i+1=" + (i+1) + explain.get(i + 1), explain.get(i + 1).contains("Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)")); - Assert.assertTrue("At i+1=" + (i+2) + explain.get(i + 2), explain.get(i + 2).contains("Reducer 3 <- Reducer 2 (SIMPLE_EDGE)")); - Assert.assertTrue("At i+1=" + (i+3) + explain.get(i + 3), explain.get(i + 3).contains("Reducer 4 <- Reducer 2 (SIMPLE_EDGE)")); - Assert.assertTrue("At i+1=" + (i+4) + explain.get(i + 4), explain.get(i + 4).contains("Reducer 5 <- Reducer 2 (SIMPLE_EDGE)")); - Assert.assertTrue("At i+1=" + (i+5) + explain.get(i + 5), explain.get(i + 5).contains("Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)")); + Assert.assertTrue(explain.get(i + 1).contains("Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)")); + Assert.assertTrue(explain.get(i + 2).contains("Reducer 3 <- Reducer 2 (SIMPLE_EDGE)")); + Assert.assertTrue(explain.get(i + 3).contains("Reducer 4 <- Reducer 2 (SIMPLE_EDGE)")); + Assert.assertTrue(explain.get(i + 4).contains("Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)")); break; } } @@ -805,31 +801,6 @@ public void testMergeDeleteUpdate() throws Exception { int[][] rExpected = {{5,6},{7,8},{11,11}}; Assert.assertEquals(stringifyValues(rExpected), r); } - - /** - * see https://issues.apache.org/jira/browse/HIVE-14949 for details - * @throws Exception - */ - @Test - public void testMergeCardinalityViolation() throws Exception { - int[][] sourceVals = {{2,2},{2,44},{5,5},{11,11}}; - runStatementOnDriver("insert into " + Table.NONACIDORCTBL + " " + makeValuesClause(sourceVals)); - int[][] targetVals = {{2,1},{4,3},{5,6},{7,8}}; - runStatementOnDriver("insert into " + Table.ACIDTBL + " " + makeValuesClause(targetVals)); - String query = "merge into " + Table.ACIDTBL + - " as t using " + Table.NONACIDORCTBL + " s ON t.a = s.a " + - "WHEN MATCHED and s.a < 5 THEN DELETE " + - "WHEN MATCHED AND s.a < 3 THEN update set b = 0 " + - "WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b) "; - runStatementOnDriverNegative(query); - runStatementOnDriver("insert into " + Table.ACIDTBLPART + " partition(p) values(1,1,'p1'),(2,2,'p1'),(3,3,'p1'),(4,4,'p2')"); - query = "merge into " + Table.ACIDTBLPART + - " as t using " + Table.NONACIDORCTBL + " s ON t.a = s.a " + - "WHEN MATCHED and s.a < 5 THEN DELETE " + - "WHEN MATCHED AND s.a < 3 THEN update set b = 0 " + - "WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b, 'p1') "; - runStatementOnDriverNegative(query); - } @Test public void testSetClauseFakeColumn() throws Exception { CommandProcessorResponse cpr = runStatementOnDriverNegative("MERGE INTO "+ Table.ACIDTBL + diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 6718ae9..af1f962 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -120,8 +120,6 @@ protected void setUpWithTableProperties(String tableProperties) throws Exception hiveConf .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); - hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, true); - TxnDbUtil.setConfValues(hiveConf); TxnDbUtil.prepDb(); File f = new File(TEST_WAREHOUSE_DIR); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/InputEstimatorTestClass.java ql/src/test/org/apache/hadoop/hive/ql/exec/InputEstimatorTestClass.java deleted file mode 100644 index 8c52979..0000000 --- ql/src/test/org/apache/hadoop/hive/ql/exec/InputEstimatorTestClass.java +++ /dev/null @@ -1,106 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.exec; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.HiveMetaHook; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; -import org.apache.hadoop.hive.ql.metadata.InputEstimator; -import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; -import org.apache.hadoop.hive.serde2.AbstractSerDe; -import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.OutputFormat; - -import java.util.Map; - -/** - * This is just a helper class to test the InputEstimator object used in some Utilities methods. - */ -public class InputEstimatorTestClass implements HiveStorageHandler, InputEstimator { - private static Estimation expectedEstimation = new Estimation(0, 0); - - public InputEstimatorTestClass() { - } - - public static void setEstimation(Estimation estimation) { - expectedEstimation = estimation; - } - - @Override - public Class getInputFormatClass() { - return null; - } - - @Override - public Class getOutputFormatClass() { - return null; - } - - @Override - public Class getSerDeClass() { - return null; - } - - @Override - public HiveMetaHook getMetaHook() { - return null; - } - - @Override - public HiveAuthorizationProvider getAuthorizationProvider() throws HiveException { - return null; - } - - @Override - public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) { - - } - - @Override - public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { - - } - - @Override - public void configureTableJobProperties(TableDesc tableDesc, Map jobProperties) { - - } - - @Override - public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { - - } - - @Override - public void setConf(Configuration conf) { - - } - - @Override - public Configuration getConf() { - return null; - } - - @Override - public Estimation estimate(JobConf job, TableScanOperator ts, long remaining) throws HiveException { - return expectedEstimation; - } -} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java index 5a9d83c..e444946 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java @@ -37,16 +37,13 @@ import java.util.concurrent.Executors; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.ContentSummary; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.io.*; +import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; +import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.InputEstimator; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; @@ -60,12 +57,7 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFFromUtcTimestamp; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.mapred.FileInputFormat; -import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.RecordReader; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -361,143 +353,4 @@ public void testGetInputPathsWithPool() throws Exception { } } } - - @Test - public void testGetInputSummaryWithASingleThread() throws IOException { - final int NUM_PARTITIONS = 5; - final int BYTES_PER_FILE = 5; - - JobConf jobConf = new JobConf(); - Properties properties = new Properties(); - - jobConf.setInt("mapred.dfsclient.parallelism.max", 0); - ContentSummary summary = runTestGetInputSummary(jobConf, properties, NUM_PARTITIONS, BYTES_PER_FILE, HiveInputFormat.class); - assertEquals(NUM_PARTITIONS * BYTES_PER_FILE, summary.getLength()); - assertEquals(NUM_PARTITIONS, summary.getFileCount()); - assertEquals(NUM_PARTITIONS, summary.getDirectoryCount()); - } - - @Test - public void testGetInputSummaryWithMultipleThreads() throws IOException { - final int NUM_PARTITIONS = 5; - final int BYTES_PER_FILE = 5; - - JobConf jobConf = new JobConf(); - Properties properties = new Properties(); - - jobConf.setInt("mapred.dfsclient.parallelism.max", 2); - ContentSummary summary = runTestGetInputSummary(jobConf, properties, NUM_PARTITIONS, BYTES_PER_FILE, HiveInputFormat.class); - assertEquals(NUM_PARTITIONS * BYTES_PER_FILE, summary.getLength()); - assertEquals(NUM_PARTITIONS, summary.getFileCount()); - assertEquals(NUM_PARTITIONS, summary.getDirectoryCount()); - } - - @Test - public void testGetInputSummaryWithInputEstimator() throws IOException, HiveException { - final int NUM_PARTITIONS = 5; - final int BYTES_PER_FILE = 10; - final int NUM_OF_ROWS = 5; - - JobConf jobConf = new JobConf(); - Properties properties = new Properties(); - - jobConf.setInt("mapred.dfsclient.parallelism.max", 2); - - properties.setProperty(hive_metastoreConstants.META_TABLE_STORAGE, InputEstimatorTestClass.class.getName()); - InputEstimatorTestClass.setEstimation(new InputEstimator.Estimation(NUM_OF_ROWS, BYTES_PER_FILE)); - - /* Let's write more bytes to the files to test that Estimator is actually working returning the file size not from the filesystem */ - ContentSummary summary = runTestGetInputSummary(jobConf, properties, NUM_PARTITIONS, BYTES_PER_FILE * 2, HiveInputFormat.class); - assertEquals(NUM_PARTITIONS * BYTES_PER_FILE, summary.getLength()); - assertEquals(NUM_PARTITIONS * -1, summary.getFileCount()); // Current getInputSummary() returns -1 for each file found - assertEquals(NUM_PARTITIONS * -1, summary.getDirectoryCount()); // Current getInputSummary() returns -1 for each file found - } - - static class ContentSummaryInputFormatTestClass extends FileInputFormat implements ContentSummaryInputFormat { - private static ContentSummary summary = new ContentSummary.Builder().build(); - - public static void setContentSummary(ContentSummary contentSummary) { - summary = contentSummary; - } - - @Override - public RecordReader getRecordReader(InputSplit inputSplit, JobConf jobConf, Reporter reporter) throws IOException { - return null; - } - - @Override - public ContentSummary getContentSummary(Path p, JobConf job) throws IOException { - return summary; - } - } - - @Test - public void testGetInputSummaryWithContentSummaryInputFormat() throws IOException { - final int NUM_PARTITIONS = 5; - final int BYTES_PER_FILE = 10; - - JobConf jobConf = new JobConf(); - Properties properties = new Properties(); - - jobConf.setInt("mapred.dfsclient.parallelism.max", 2); - - ContentSummaryInputFormatTestClass.setContentSummary( - new ContentSummary.Builder().length(BYTES_PER_FILE).fileCount(2).directoryCount(1).build()); - - /* Let's write more bytes to the files to test that ContentSummaryInputFormat is actually working returning the file size not from the filesystem */ - ContentSummary summary = runTestGetInputSummary(jobConf, properties, NUM_PARTITIONS, BYTES_PER_FILE * 2, ContentSummaryInputFormatTestClass.class); - assertEquals(NUM_PARTITIONS * BYTES_PER_FILE, summary.getLength()); - assertEquals(NUM_PARTITIONS * 2, summary.getFileCount()); - assertEquals(NUM_PARTITIONS, summary.getDirectoryCount()); - } - - private ContentSummary runTestGetInputSummary(JobConf jobConf, Properties properties, int numOfPartitions, int bytesPerFile, Class inputFormatClass) throws IOException { - // creates scratch directories needed by the Context object - SessionState.start(new HiveConf()); - - MapWork mapWork = new MapWork(); - Context context = new Context(jobConf); - LinkedHashMap pathToPartitionInfo = new LinkedHashMap<>(); - LinkedHashMap> pathToAliasTable = new LinkedHashMap<>(); - TableScanOperator scanOp = new TableScanOperator(); - - PartitionDesc partitionDesc = new PartitionDesc(new TableDesc(inputFormatClass, null, properties), null); - - String testTableName = "testTable"; - - Path testTablePath = new Path(testTableName); - Path[] testPartitionsPaths = new Path[numOfPartitions]; - for (int i=0; i children1 = new ArrayList(2); - children1.add(notBetween); - children1.add(colExpr); - children1.add(minExpr); - children1.add(maxExpr); - betweenExpr.setChildren(children1); - - Vectorizer v = new Vectorizer(); - Assert.assertTrue(v.validateExprNodeDesc(betweenExpr, Mode.FILTER)); - } } diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestMergeStatement.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestMergeStatement.java index a862e5c..7481e1a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/parse/TestMergeStatement.java +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestMergeStatement.java @@ -1,20 +1,3 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package org.apache.hadoop.hive.ql.parse; import org.antlr.runtime.tree.RewriteEmptyStreamException; diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestQBSubQuery.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestQBSubQuery.java index 311a34d..f9db2c8 100644 --- ql/src/test/org/apache/hadoop/hive/ql/parse/TestQBSubQuery.java +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestQBSubQuery.java @@ -122,7 +122,7 @@ public void testCheckAggOrWindowing() throws Exception { Assert.assertEquals(0, SubQueryUtils.checkAggOrWindowing((ASTNode) select.getChild(0))); Assert.assertEquals(1, SubQueryUtils.checkAggOrWindowing((ASTNode) select.getChild(1))); - Assert.assertEquals(3, SubQueryUtils.checkAggOrWindowing((ASTNode) select.getChild(2))); + Assert.assertEquals(2, SubQueryUtils.checkAggOrWindowing((ASTNode) select.getChild(2))); } private ASTNode where(ASTNode qry) { diff --git ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java index 1756711..165c028 100644 --- ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java +++ ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java @@ -1,20 +1,3 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package org.apache.hadoop.hive.ql.plan; import static org.junit.Assert.assertEquals; diff --git ql/src/test/org/apache/hadoop/hive/ql/plan/TestViewEntity.java ql/src/test/org/apache/hadoop/hive/ql/plan/TestViewEntity.java index 4cc9d72..fa01416 100644 --- ql/src/test/org/apache/hadoop/hive/ql/plan/TestViewEntity.java +++ ql/src/test/org/apache/hadoop/hive/ql/plan/TestViewEntity.java @@ -168,45 +168,8 @@ public void testSubQueryInSubView() throws Exception { // table1 and view1 as second read entity assertEquals("default@" + view1, CheckInputReadEntity.readEntities[1].getName()); assertFalse("Table is not direct input", CheckInputReadEntity.readEntities[1].isDirect()); - Set parents = CheckInputReadEntity.readEntities[1].getParents(); - assertTrue("Table does not have parent", parents != null && parents.size() > 0); assertEquals("default@" + tab1, CheckInputReadEntity.readEntities[2].getName()); assertFalse("Table is not direct input", CheckInputReadEntity.readEntities[2].isDirect()); } - - /** - * Verify that the the query with the subquery inside a view will have the correct - * direct and indirect inputs. - * @throws Exception - */ - @Test - public void testUnionAllInSubView() throws Exception { - String prefix = "tvunionallinsubview" + NAME_PREFIX; - final String tab1 = prefix + "t"; - final String view1 = prefix + "v"; - final String view2 = prefix + "v2"; - - int ret = driver.run("create table " + tab1 + "(id int)").getResponseCode(); - assertEquals("Checking command success", 0, ret); - ret = driver.run("create view " + view1 + " as select * from " + tab1).getResponseCode(); - assertEquals("Checking command success", 0, ret); - - ret = driver.run("create view " + view2 + " as select * from (select * from " + view1 + " union all select * from " + view1 + ") x").getResponseCode(); - assertEquals("Checking command success", 0, ret); - - driver.compile("select * from " + view2); - // view entity - assertEquals("default@" + view2, CheckInputReadEntity.readEntities[0].getName()); - - // table1 and view1 as second read entity - assertEquals("default@" + view1, CheckInputReadEntity.readEntities[1].getName()); - assertFalse("Table is not direct input", CheckInputReadEntity.readEntities[1].isDirect()); - Set parents = CheckInputReadEntity.readEntities[1].getParents(); - assertTrue("Table does not have parent", parents != null && parents.size() > 0); - assertEquals("default@" + tab1, CheckInputReadEntity.readEntities[2].getName()); - assertFalse("Table is not direct input", CheckInputReadEntity.readEntities[2].isDirect()); - - } - } diff --git ql/src/test/queries/clientnegative/authorization_msck.q ql/src/test/queries/clientnegative/authorization_msck.q deleted file mode 100644 index 8c7edce..0000000 --- ql/src/test/queries/clientnegative/authorization_msck.q +++ /dev/null @@ -1,20 +0,0 @@ -set hive.test.authz.sstd.hs2.mode=true; -set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; -set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator; -set hive.security.authorization.enabled=true; -set user.name=user1; - --- check if alter table fails as different user -create table t1(i int); -msck repair table t1; - - -set user.name=user1; -GRANT INSERT ON t1 TO USER user2; - -set user.name=user2; -msck repair table t1; - -set user.name=user3; -msck repair table t1; - diff --git ql/src/test/queries/clientnegative/druid_case.q ql/src/test/queries/clientnegative/druid_case.q deleted file mode 100644 index 4828bf8..0000000 --- ql/src/test/queries/clientnegative/druid_case.q +++ /dev/null @@ -1,6 +0,0 @@ -set hive.strict.checks.cartesian.product=false; -set hive.druid.broker.address.default=localhost.test; - -CREATE EXTERNAL TABLE druid_table_1 -STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler2' -TBLPROPERTIES ("druid.datasource" = "wikipedia"); diff --git ql/src/test/queries/clientnegative/subquery_in_implicit_gby.q ql/src/test/queries/clientnegative/subquery_in_implicit_gby.q index 4e87a33..338747e 100644 --- ql/src/test/queries/clientnegative/subquery_in_implicit_gby.q +++ ql/src/test/queries/clientnegative/subquery_in_implicit_gby.q @@ -1,11 +1 @@ -create table t(i int, j int); -insert into t values(0,1), (0,2); - -create table tt(i int, j int); -insert into tt values(0,3); - --- since this is correlated with COUNT aggregate and subquery returns 0 rows for group by (i=j) it should be a runtime error -select * from t where i IN (select count(i) from tt where tt.j = t.j); - -drop table t; -drop table tt; +explain select * from part where p_partkey IN (select count(*) from part pp where pp.p_type = part.p_type); diff --git ql/src/test/queries/clientnegative/subquery_in_on.q ql/src/test/queries/clientnegative/subquery_in_on.q deleted file mode 100644 index 5f44f72..0000000 --- ql/src/test/queries/clientnegative/subquery_in_on.q +++ /dev/null @@ -1,3 +0,0 @@ --- subquery in ON clause -explain SELECT p1.p_name FROM part p1 LEFT JOIN (select p_type as p_col from part ) p2 - ON (select pp1.p_type as p_col from part pp1 where pp1.p_partkey = p2.p_col); \ No newline at end of file diff --git ql/src/test/queries/clientnegative/subquery_notin_implicit_gby.q ql/src/test/queries/clientnegative/subquery_notin_implicit_gby.q deleted file mode 100644 index 367603e..0000000 --- ql/src/test/queries/clientnegative/subquery_notin_implicit_gby.q +++ /dev/null @@ -1,11 +0,0 @@ -create table t(i int, j int); -insert into t values(0,1), (0,2); - -create table tt(i int, j int); -insert into tt values(0,3); - --- since this is correlated with COUNT aggregate and subquery returns 0 rows for group by (i=j) it should be a runtime error -select * from t where i NOT IN (select count(i) from tt where tt.j = t.j); - -drop table t; -drop table tt; diff --git ql/src/test/queries/clientpositive/alter_table_stats_status.q ql/src/test/queries/clientpositive/alter_table_stats_status.q deleted file mode 100644 index 8e07b81..0000000 --- ql/src/test/queries/clientpositive/alter_table_stats_status.q +++ /dev/null @@ -1,48 +0,0 @@ -create database statsdb; -use statsdb; -create table srctable like default.src; -load data local inpath '../../data/files/kv1.txt' overwrite into table srctable; - -analyze table srctable compute statistics; -describe formatted srctable; - -alter table srctable touch; -alter table srctable rename to statstable; - -alter table statstable add columns (newcol string); -alter table statstable change key key string; -alter table statstable set tblproperties('testtblstats'='unchange'); -describe formatted statstable; - -alter table statstable update statistics set ('numRows' = '1000'); -describe formatted statstable; - -analyze table statstable compute statistics; -describe formatted statstable; -alter table statstable set location '${system:test.tmp.dir}/newdir'; -describe formatted statstable; - -drop table statstable; - -create table srcpart like default.srcpart; -load data local inpath '../../data/files/kv1.txt' overwrite into table srcpart partition (ds='2008-04-08', hr='11'); -load data local inpath '../../data/files/kv1.txt' overwrite into table srcpart partition (ds='2008-04-08', hr='12'); - -analyze table srcpart partition (ds='2008-04-08', hr='11') compute statistics; -describe formatted srcpart partition (ds='2008-04-08', hr='11'); - -alter table srcpart touch; -alter table srcpart partition (ds='2008-04-08', hr='11') rename to partition (ds='2017-01-19', hr='11'); -alter table srcpart partition (ds='2017-01-19', hr='11') add columns (newcol string); -alter table srcpart partition (ds='2017-01-19', hr='11') change key key string; -alter table srcpart set tblproperties('testpartstats'='unchange'); -describe formatted srcpart partition (ds='2017-01-19', hr='11'); - -alter table srcpart partition (ds='2017-01-19', hr='11') update statistics set ('numRows' = '1000'); -describe formatted srcpart partition (ds='2017-01-19', hr='11'); - -analyze table srcpart partition (ds='2017-01-19', hr='11') compute statistics; -describe formatted srcpart partition (ds='2017-01-19', hr='11'); - -drop table srcpart; - diff --git ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q index e686af6..13797c0 100644 --- ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q +++ ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q @@ -62,18 +62,6 @@ select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcp set hive.tez.dynamic.semijoin.reduction=true; EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); ---set hive.tez.dynamic.semijoin.reduction=false; - --- With Mapjoins. -set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=100000000000; - -EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); -select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); -set hive.tez.dynamic.semijoin.reduction=true; -EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); -select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); drop table srcpart_date; drop table srcpart_small; diff --git ql/src/test/queries/clientpositive/equal_ns.q ql/src/test/queries/clientpositive/equal_ns.q deleted file mode 100644 index 910d089..0000000 --- ql/src/test/queries/clientpositive/equal_ns.q +++ /dev/null @@ -1,6 +0,0 @@ -set hive.mapred.mode=nonstrict; --- SORT_QUERY_RESULTS - -create table test(x string, y string); -insert into test values ('q', 'q'), ('q', 'w'), (NULL, 'q'), ('q', NULL), (NULL, NULL); -select *, x<=>y, not (x<=> y), (x <=> y) = false from test; diff --git ql/src/test/queries/clientpositive/explainuser_3.q ql/src/test/queries/clientpositive/explainuser_3.q index 9c6c9dc..282629e 100644 --- ql/src/test/queries/clientpositive/explainuser_3.q +++ ql/src/test/queries/clientpositive/explainuser_3.q @@ -13,7 +13,6 @@ set hive.vectorized.execution.enabled=true; CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10; -analyze table acid_vectorized compute statistics for columns; explain select a, b from acid_vectorized order by a, b; explain select key, value diff --git ql/src/test/queries/clientpositive/orc_ppd_basic.q ql/src/test/queries/clientpositive/orc_ppd_basic.q index cf5870e..41134a0 100644 --- ql/src/test/queries/clientpositive/orc_ppd_basic.q +++ ql/src/test/queries/clientpositive/orc_ppd_basic.q @@ -3,8 +3,6 @@ set hive.mapred.mode=nonstrict; SET hive.fetch.task.conversion=none; SET hive.optimize.index.filter=true; SET hive.cbo.enable=false; -SET hive.map.aggr=false; --- disabling map side aggregation as that can lead to different intermediate record counts CREATE TABLE staging(t tinyint, si smallint, @@ -34,16 +32,17 @@ CREATE TABLE orc_ppd_staging(t tinyint, c char(50), v varchar(50), da date, + ts timestamp, dec decimal(4,2), bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); -insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), dec, bin from staging order by t, s; +insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s; -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values -- which makes it hard to test bloom filters -insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11",-71.54,"aaa" from staging limit 1; -insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11",71.54,"zzz" from staging limit 1; +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1; +insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1; CREATE TABLE orc_ppd(t tinyint, si smallint, @@ -56,11 +55,12 @@ CREATE TABLE orc_ppd(t tinyint, c char(50), v varchar(50), da date, + ts timestamp, dec decimal(4,2), bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); -insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), da, dec, bin from orc_ppd_staging order by t, s; +insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s; SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter; @@ -197,9 +197,6 @@ set hive.mapred.mode=nonstrict; SET hive.fetch.task.conversion=none; SET hive.optimize.index.filter=true; SET hive.cbo.enable=false; -SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecOrcRowGroupCountPrinter; --- these tests include timestamp column that will impact the file size when tests run across --- different timezones. So we print only the selected row group count instead of entire tez exeuction summary. create temporary table tmp_orcppd stored as orc as select ctinyint, csmallint, cint , cbigint, cfloat, cdouble, @@ -236,13 +233,13 @@ set hive.optimize.index.filter=true; drop table if exists tmp_orcppd; create temporary table tmp_orcppd stored as orc - as select ts, cast(ts as date) - from staging ; + as select ts, da + from orc_ppd_staging ; insert into table tmp_orcppd values(null, null); drop table if exists tbl_orcppd_3_1; create table tbl_orcppd_3_1 as select count(*) from tmp_orcppd - group by ts, cast(ts as date) + group by ts, da having ts in (select ctimestamp1 from alltypesorc limit 10); diff --git ql/src/test/queries/clientpositive/orc_ppd_schema_evol_1a.q ql/src/test/queries/clientpositive/orc_ppd_schema_evol_1a.q index 60408db..f52c2ac 100644 --- ql/src/test/queries/clientpositive/orc_ppd_schema_evol_1a.q +++ ql/src/test/queries/clientpositive/orc_ppd_schema_evol_1a.q @@ -3,8 +3,6 @@ set hive.metastore.disallow.incompatible.col.type.changes=true; set hive.optimize.ppd=false; set hive.optimize.index.filter=false; set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; -SET hive.map.aggr=false; --- disabling map side aggregation as that can lead to different intermediate record counts create table unique_1( @@ -41,4 +39,4 @@ select s from test1 where i = '-1591211872'; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select s from test1 where i = -1591211872; +select s from test1 where i = -1591211872; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/orc_ppd_schema_evol_1b.q ql/src/test/queries/clientpositive/orc_ppd_schema_evol_1b.q index c1be125..e669f06 100644 --- ql/src/test/queries/clientpositive/orc_ppd_schema_evol_1b.q +++ ql/src/test/queries/clientpositive/orc_ppd_schema_evol_1b.q @@ -3,8 +3,6 @@ set hive.metastore.disallow.incompatible.col.type.changes=true; set hive.optimize.ppd=false; set hive.optimize.index.filter=false; set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; -SET hive.map.aggr=false; --- disabling map side aggregation as that can lead to different intermediate record counts create table unique_1( i int, diff --git ql/src/test/queries/clientpositive/orc_ppd_schema_evol_2a.q ql/src/test/queries/clientpositive/orc_ppd_schema_evol_2a.q index 8951496..609924d 100644 --- ql/src/test/queries/clientpositive/orc_ppd_schema_evol_2a.q +++ ql/src/test/queries/clientpositive/orc_ppd_schema_evol_2a.q @@ -3,8 +3,6 @@ set hive.metastore.disallow.incompatible.col.type.changes=false; set hive.optimize.ppd=false; set hive.optimize.index.filter=false; set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; -SET hive.map.aggr=false; --- disabling map side aggregation as that can lead to different intermediate record counts @@ -42,4 +40,4 @@ select s from test1 where d = -4996703.42; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -select s from test1 where d = -4996703.42; +select s from test1 where d = -4996703.42; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/orc_ppd_schema_evol_2b.q ql/src/test/queries/clientpositive/orc_ppd_schema_evol_2b.q index f6e3133..edebeef 100644 --- ql/src/test/queries/clientpositive/orc_ppd_schema_evol_2b.q +++ ql/src/test/queries/clientpositive/orc_ppd_schema_evol_2b.q @@ -3,8 +3,6 @@ set hive.metastore.disallow.incompatible.col.type.changes=false; set hive.optimize.ppd=false; set hive.optimize.index.filter=false; set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; -SET hive.map.aggr=false; --- disabling map side aggregation as that can lead to different intermediate record counts create table unique_1( diff --git ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q index 5b6731d..88a94eb 100644 --- ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q +++ ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q @@ -1,8 +1,6 @@ set hive.mapred.mode=nonstrict; SET hive.fetch.task.conversion=none; SET hive.cbo.enable=false; -SET hive.map.aggr=false; --- disabling map side aggregation as that can lead to different intermediate record counts CREATE TABLE staging(t tinyint, si smallint, @@ -32,16 +30,17 @@ CREATE TABLE orc_ppd_staging(t tinyint, c char(50), v varchar(50), da date, + ts timestamp, dec decimal(4,2), bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); -insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), dec, bin from staging order by t, s; +insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s; -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values -- which makes it hard to test bloom filters -insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11",-71.54,"aaa" from staging limit 1; -insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11",71.54,"zzz" from staging limit 1; +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1; +insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1; CREATE TABLE orc_ppd(t tinyint, si smallint, @@ -54,11 +53,12 @@ CREATE TABLE orc_ppd(t tinyint, c char(50), v varchar(50), da date, + ts timestamp, dec decimal(4,2), bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); -insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), da, dec, bin from orc_ppd_staging order by t, s; +insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s; SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter; SET hive.optimize.index.filter=false; diff --git ql/src/test/queries/clientpositive/stats_missing_warning.q ql/src/test/queries/clientpositive/stats_missing_warning.q deleted file mode 100644 index b6cf049..0000000 --- ql/src/test/queries/clientpositive/stats_missing_warning.q +++ /dev/null @@ -1,55 +0,0 @@ -set hive.stats.autogather=false; - --- Explictily DROP vs. CREATE IF NOT EXISTS to ensure stats are not carried over -DROP TABLE IF EXISTS missing_stats_t1; -DROP TABLE IF EXISTS missing_stats_t2; -DROP TABLE IF EXISTS missing_stats_t3; -CREATE TABLE missing_stats_t1 (key STRING, value STRING); -CREATE TABLE missing_stats_t2 (key STRING, value STRING); -CREATE TABLE missing_stats_t3 (key STRING, value STRING); - -INSERT INTO missing_stats_t1 (key, value) - SELECT key, value - FROM src; - -INSERT INTO missing_stats_t2 (key, value) - SELECT key, value - FROM src; - -INSERT INTO missing_stats_t3 (key, value) - SELECT key, value - FROM src; - --- Default should be FALSE -set hive.cbo.show.warnings=true; - -set hive.cbo.enable=true; - --- Should print warning -set hive.cbo.show.warnings=true; - -SELECT COUNT(*) -FROM missing_stats_t1 t1 -JOIN missing_stats_t2 t2 ON t1.value = t2.key -JOIN missing_stats_t3 t3 ON t2.key = t3.value; - --- Should not print warning -set hive.cbo.show.warnings=false; - -SELECT COUNT(*) -FROM missing_stats_t1 t1 -JOIN missing_stats_t2 t2 ON t1.value = t2.key -JOIN missing_stats_t3 t3 ON t2.key = t3.value; - -ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS; -ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS; -ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS; - - --- Warning should be gone -set hive.cbo.show.warnings=true; - -SELECT COUNT(*) -FROM missing_stats_t1 t1 -JOIN missing_stats_t2 t2 ON t1.value = t2.key -JOIN missing_stats_t3 t3 ON t2.key = t3.value; diff --git ql/src/test/queries/clientpositive/subquery_in.q ql/src/test/queries/clientpositive/subquery_in.q index 4ba170a..7293c77 100644 --- ql/src/test/queries/clientpositive/subquery_in.q +++ ql/src/test/queries/clientpositive/subquery_in.q @@ -55,24 +55,6 @@ part where part.p_size in ) ; --- agg, corr -explain -select p_mfgr, p_name, p_size -from part b where b.p_size in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr - ) -; - -select p_mfgr, p_name, p_size -from part b where b.p_size in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr - ) -; - -- distinct, corr explain select * @@ -206,26 +188,6 @@ explain select p_partkey from select p_partkey from (select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq; --- corr IN with COUNT aggregate -explain select * from part where p_size IN (select count(*) from part pp where pp.p_type = part.p_type); -select * from part where p_size IN (select count(*) from part pp where pp.p_type = part.p_type); - --- corr IN with aggregate other than COUNT -explain select * from part where p_size in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey); -select * from part where p_size in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey); - --- corr IN with aggregate other than COUNT (MIN) with non-equi join -explain select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey); -select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey); - --- corr IN with COUNT aggregate -explain select * from part where p_size NOT IN (select count(*) from part pp where pp.p_type = part.p_type); -select * from part where p_size NOT IN (select count(*) from part pp where pp.p_type = part.p_type); - --- corr IN with aggregate other than COUNT -explain select * from part where p_size not in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey); -select * from part where p_size not in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey); - create table t(i int); insert into t values(1); insert into t values(0); @@ -254,16 +216,3 @@ select * from part where p_size IN (select i from tnull); select * from tnull where i IN (select i from tnull); drop table tempty; - -create table t(i int, j int); -insert into t values(0,1), (0,2); - -create table tt(i int, j int); -insert into tt values(0,3); - --- corr IN with aggregate other than COUNT return zero rows -explain select * from t where i IN (select sum(i) from tt where tt.j = t.j); -select * from t where i IN (select sum(i) from tt where tt.j = t.j); - -drop table t; -drop table tt; diff --git ql/src/test/queries/clientpositive/subquery_notexists.q ql/src/test/queries/clientpositive/subquery_notexists.q index 4103603..dc48c3e 100644 --- ql/src/test/queries/clientpositive/subquery_notexists.q +++ ql/src/test/queries/clientpositive/subquery_notexists.q @@ -58,10 +58,4 @@ where not exists from src a where b.value <> a.value and a.key > b.key and a.value > 'val_2' ) -; - --- bug in decorrelation where HiveProject gets multiple column with same name -explain SELECT p1.p_name FROM part p1 LEFT JOIN (select p_type as p_col from part ) p2 WHERE NOT EXISTS - (select pp1.p_type as p_col from part pp1 where pp1.p_partkey = p2.p_col); -SELECT p1.p_name FROM part p1 LEFT JOIN (select p_type as p_col from part ) p2 WHERE NOT EXISTS - (select pp1.p_type as p_col from part pp1 where pp1.p_partkey = p2.p_col); +; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/subquery_notin.q ql/src/test/queries/clientpositive/subquery_notin.q index e23eb2b..0883c89 100644 --- ql/src/test/queries/clientpositive/subquery_notin.q +++ ql/src/test/queries/clientpositive/subquery_notin.q @@ -57,24 +57,6 @@ part where part.p_size not in order by p_name, p_size ; --- agg, corr -explain -select p_mfgr, p_name, p_size -from part b where b.p_size not in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr - ) -; - -select p_mfgr, p_name, p_size -from part b where b.p_size not in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr - ) -; - -- non agg, non corr, Group By in Parent Query select li.l_partkey, count(*) from lineitem li diff --git ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q deleted file mode 100644 index e1eefff..0000000 --- ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q +++ /dev/null @@ -1,43 +0,0 @@ -set hive.compute.query.using.stats=false; -set hive.mapred.mode=nonstrict; -set hive.explain.user=false; -set hive.optimize.ppd=true; -set hive.ppd.remove.duplicatefilters=true; -set hive.tez.dynamic.partition.pruning=true; -set hive.tez.dynamic.semijoin.reduction=true; -set hive.optimize.metadataonly=false; -set hive.optimize.index.filter=true; - -set hive.vectorized.adaptor.usage.mode=none; -set hive.vectorized.execution.enabled=true; - --- Create Tables -create table dsrv_big stored as orc as select key as key_str, cast(key as int) as key_int, value from src; -create table dsrv_small stored as orc as select distinct key as key_str, cast(key as int) as key_int, value from src where key < 100; - --- single key (int) -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int); -select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int); - --- single key (string) -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str); -select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str); - --- keys are different type -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str); -select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_str); - --- multiple tables -EXPLAIN select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int; -select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int; - --- multiple keys -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int); -select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int); - --- small table result is empty -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2'); -select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2'); - -drop table dsrv_big; -drop table dsrv_small; diff --git ql/src/test/results/clientnegative/authorization_msck.q.out ql/src/test/results/clientnegative/authorization_msck.q.out deleted file mode 100644 index 7e36488..0000000 --- ql/src/test/results/clientnegative/authorization_msck.q.out +++ /dev/null @@ -1,27 +0,0 @@ -PREHOOK: query: create table t1(i int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t1 -POSTHOOK: query: create table t1(i int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t1 -PREHOOK: query: msck repair table t1 -PREHOOK: type: MSCK -PREHOOK: Output: default@t1 -POSTHOOK: query: msck repair table t1 -POSTHOOK: type: MSCK -POSTHOOK: Output: default@t1 -PREHOOK: query: GRANT INSERT ON t1 TO USER user2 -PREHOOK: type: GRANT_PRIVILEGE -PREHOOK: Output: default@t1 -POSTHOOK: query: GRANT INSERT ON t1 TO USER user2 -POSTHOOK: type: GRANT_PRIVILEGE -POSTHOOK: Output: default@t1 -PREHOOK: query: msck repair table t1 -PREHOOK: type: MSCK -PREHOOK: Output: default@t1 -POSTHOOK: query: msck repair table t1 -POSTHOOK: type: MSCK -POSTHOOK: Output: default@t1 -FAILED: HiveAccessControlException Permission denied: Principal [name=user3, type=USER] does not have following privileges for operation MSCK [[INSERT] on Object [type=TABLE_OR_VIEW, name=default.t1]] diff --git ql/src/test/results/clientnegative/druid_case.q.out ql/src/test/results/clientnegative/druid_case.q.out deleted file mode 100644 index 457028b..0000000 --- ql/src/test/results/clientnegative/druid_case.q.out +++ /dev/null @@ -1,7 +0,0 @@ -PREHOOK: query: CREATE EXTERNAL TABLE druid_table_1 -STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler2' -TBLPROPERTIES ("druid.datasource" = "wikipedia") -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@druid_table_1 -FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. org.apache.hadoop.hive.ql.metadata.HiveException: Duplicate column name anonymous in the table definition. diff --git ql/src/test/results/clientnegative/msck_repair_1.q.out ql/src/test/results/clientnegative/msck_repair_1.q.out index 174419f..c5f644d 100644 --- ql/src/test/results/clientnegative/msck_repair_1.q.out +++ ql/src/test/results/clientnegative/msck_repair_1.q.out @@ -12,11 +12,8 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask diff --git ql/src/test/results/clientnegative/msck_repair_2.q.out ql/src/test/results/clientnegative/msck_repair_2.q.out index 174419f..c5f644d 100644 --- ql/src/test/results/clientnegative/msck_repair_2.q.out +++ ql/src/test/results/clientnegative/msck_repair_2.q.out @@ -12,11 +12,8 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask diff --git ql/src/test/results/clientnegative/msck_repair_3.q.out ql/src/test/results/clientnegative/msck_repair_3.q.out index 174419f..c5f644d 100644 --- ql/src/test/results/clientnegative/msck_repair_3.q.out +++ ql/src/test/results/clientnegative/msck_repair_3.q.out @@ -12,11 +12,8 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask diff --git ql/src/test/results/clientnegative/subquery_exists_implicit_gby.q.out ql/src/test/results/clientnegative/subquery_exists_implicit_gby.q.out index b650309..bab6138 100644 --- ql/src/test/results/clientnegative/subquery_exists_implicit_gby.q.out +++ ql/src/test/results/clientnegative/subquery_exists_implicit_gby.q.out @@ -1 +1 @@ -FAILED: SemanticException [Error 10250]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 7:7 Invalid SubQuery expression ''val_9'': A predicate on EXISTS/NOT EXISTS SubQuery with implicit Aggregation(no Group By clause) cannot be rewritten. +FAILED: SemanticException [Error 10250]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 7:7 Invalid SubQuery expression ''val_9'': A predicate on EXISTS/NOT EXISTS/IN/NOT IN SubQuery with implicit Aggregation(no Group By clause) cannot be rewritten. diff --git ql/src/test/results/clientnegative/subquery_in_groupby.q.out ql/src/test/results/clientnegative/subquery_in_groupby.q.out index 5c69690..a546d49 100644 --- ql/src/test/results/clientnegative/subquery_in_groupby.q.out +++ ql/src/test/results/clientnegative/subquery_in_groupby.q.out @@ -1 +1 @@ -FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates +FAILED: SemanticException [Error 10249]: Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates diff --git ql/src/test/results/clientnegative/subquery_in_implicit_gby.q.out ql/src/test/results/clientnegative/subquery_in_implicit_gby.q.out index 3ce8cc1..a882fbc 100644 --- ql/src/test/results/clientnegative/subquery_in_implicit_gby.q.out +++ ql/src/test/results/clientnegative/subquery_in_implicit_gby.q.out @@ -1,38 +1 @@ -PREHOOK: query: create table t(i int, j int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t -POSTHOOK: query: create table t(i int, j int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t -PREHOOK: query: insert into t values(0,1), (0,2) -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values(0,1), (0,2) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: t.j EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: create table tt(i int, j int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tt -POSTHOOK: query: create table tt(i int, j int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tt -PREHOOK: query: insert into tt values(0,3) -PREHOOK: type: QUERY -PREHOOK: Output: default@tt -POSTHOOK: query: insert into tt values(0,3) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@tt -POSTHOOK: Lineage: tt.i EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: tt.j EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: select * from t where i IN (select count(i) from tt where tt.j = t.j) -PREHOOK: type: QUERY -PREHOOK: Input: default@t -PREHOOK: Input: default@tt -#### A masked pattern was here #### -FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask +FAILED: SemanticException [Error 10250]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 1:68 Invalid SubQuery expression 'p_type': A predicate on EXISTS/NOT EXISTS/IN/NOT IN SubQuery with implicit Aggregation(no Group By clause) cannot be rewritten. diff --git ql/src/test/results/clientnegative/subquery_in_on.q.out ql/src/test/results/clientnegative/subquery_in_on.q.out deleted file mode 100644 index 551f0ec..0000000 --- ql/src/test/results/clientnegative/subquery_in_on.q.out +++ /dev/null @@ -1 +0,0 @@ -FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: 2:77 Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates. Error encountered near token 'p2' diff --git ql/src/test/results/clientnegative/subquery_in_select.q.out ql/src/test/results/clientnegative/subquery_in_select.q.out index 5c69690..a546d49 100644 --- ql/src/test/results/clientnegative/subquery_in_select.q.out +++ ql/src/test/results/clientnegative/subquery_in_select.q.out @@ -1 +1 @@ -FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates +FAILED: SemanticException [Error 10249]: Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates diff --git ql/src/test/results/clientnegative/subquery_notexists_implicit_gby.q.out ql/src/test/results/clientnegative/subquery_notexists_implicit_gby.q.out index b650309..bab6138 100644 --- ql/src/test/results/clientnegative/subquery_notexists_implicit_gby.q.out +++ ql/src/test/results/clientnegative/subquery_notexists_implicit_gby.q.out @@ -1 +1 @@ -FAILED: SemanticException [Error 10250]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 7:7 Invalid SubQuery expression ''val_9'': A predicate on EXISTS/NOT EXISTS SubQuery with implicit Aggregation(no Group By clause) cannot be rewritten. +FAILED: SemanticException [Error 10250]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 7:7 Invalid SubQuery expression ''val_9'': A predicate on EXISTS/NOT EXISTS/IN/NOT IN SubQuery with implicit Aggregation(no Group By clause) cannot be rewritten. diff --git ql/src/test/results/clientnegative/subquery_notin_implicit_gby.q.out ql/src/test/results/clientnegative/subquery_notin_implicit_gby.q.out deleted file mode 100644 index a208951..0000000 --- ql/src/test/results/clientnegative/subquery_notin_implicit_gby.q.out +++ /dev/null @@ -1,38 +0,0 @@ -PREHOOK: query: create table t(i int, j int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t -POSTHOOK: query: create table t(i int, j int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t -PREHOOK: query: insert into t values(0,1), (0,2) -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values(0,1), (0,2) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: t.j EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: create table tt(i int, j int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tt -POSTHOOK: query: create table tt(i int, j int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tt -PREHOOK: query: insert into tt values(0,3) -PREHOOK: type: QUERY -PREHOOK: Output: default@tt -POSTHOOK: query: insert into tt values(0,3) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@tt -POSTHOOK: Lineage: tt.i EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: tt.j EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: select * from t where i NOT IN (select count(i) from tt where tt.j = t.j) -PREHOOK: type: QUERY -PREHOOK: Input: default@t -PREHOOK: Input: default@tt -#### A masked pattern was here #### -FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask diff --git ql/src/test/results/clientnegative/table_nonprintable_negative.q.out ql/src/test/results/clientnegative/table_nonprintable_negative.q.out index 8b22480..15af756 100644 --- ql/src/test/results/clientnegative/table_nonprintable_negative.q.out +++ ql/src/test/results/clientnegative/table_nonprintable_negative.q.out @@ -16,5 +16,4 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@table_external PREHOOK: query: msck repair table table_external PREHOOK: type: MSCK -PREHOOK: Output: default@table_external FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask diff --git ql/src/test/results/clientnegative/unset_table_property.q.out ql/src/test/results/clientnegative/unset_table_property.q.out index 4aedfc5..0705b92 100644 --- ql/src/test/results/clientnegative/unset_table_property.q.out +++ ql/src/test/results/clientnegative/unset_table_property.q.out @@ -18,7 +18,6 @@ PREHOOK: query: SHOW TBLPROPERTIES testTable PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: SHOW TBLPROPERTIES testTable POSTHOOK: type: SHOW_TBLPROPERTIES -COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} a 1 c 3 #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/acid_subquery.q.out ql/src/test/results/clientpositive/acid_subquery.q.out index f3c44b1..b6402e4 100644 --- ql/src/test/results/clientpositive/acid_subquery.q.out +++ ql/src/test/results/clientpositive/acid_subquery.q.out @@ -75,7 +75,6 @@ PREHOOK: Input: default@target PREHOOK: Input: default@target@p=1/q=2 PREHOOK: Input: default@target@p=1/q=3 PREHOOK: Input: default@target@p=2/q=2 -PREHOOK: Output: default@merge_tmp_table PREHOOK: Output: default@target PREHOOK: Output: default@target@p=1/q=2 PREHOOK: Output: default@target@p=1/q=2 @@ -90,11 +89,9 @@ POSTHOOK: Input: default@target POSTHOOK: Input: default@target@p=1/q=2 POSTHOOK: Input: default@target@p=1/q=3 POSTHOOK: Input: default@target@p=2/q=2 -POSTHOOK: Output: default@merge_tmp_table POSTHOOK: Output: default@target@p=1/q=2 POSTHOOK: Output: default@target@p=1/q=2 POSTHOOK: Output: default@target@p=1/q=3 POSTHOOK: Output: default@target@p=1/q=3 POSTHOOK: Output: default@target@p=2/q=2 POSTHOOK: Output: default@target@p=2/q=2 -POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(target)t.FieldSchema(name:ROW__ID, type:struct, comment:), (target)t.FieldSchema(name:p, type:int, comment:null), (target)t.FieldSchema(name:q, type:int, comment:null), ] diff --git ql/src/test/results/clientpositive/alter_file_format.q.out ql/src/test/results/clientpositive/alter_file_format.q.out index ca569be..a69b423 100644 --- ql/src/test/results/clientpositive/alter_file_format.q.out +++ ql/src/test/results/clientpositive/alter_file_format.q.out @@ -67,7 +67,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -111,7 +110,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -155,7 +153,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -199,7 +196,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -243,7 +239,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -287,7 +282,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 diff --git ql/src/test/results/clientpositive/alter_skewed_table.q.out ql/src/test/results/clientpositive/alter_skewed_table.q.out index fefef4c..0f60ba3 100644 --- ql/src/test/results/clientpositive/alter_skewed_table.q.out +++ ql/src/test/results/clientpositive/alter_skewed_table.q.out @@ -67,7 +67,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -170,7 +169,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -269,7 +267,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 diff --git ql/src/test/results/clientpositive/alter_table_not_sorted.q.out ql/src/test/results/clientpositive/alter_table_not_sorted.q.out index 5afb7fa..566b804 100644 --- ql/src/test/results/clientpositive/alter_table_not_sorted.q.out +++ ql/src/test/results/clientpositive/alter_table_not_sorted.q.out @@ -68,7 +68,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} SORTBUCKETCOLSPREFIX TRUE #### A masked pattern was here #### numFiles 0 diff --git ql/src/test/results/clientpositive/alter_table_stats_status.q.out ql/src/test/results/clientpositive/alter_table_stats_status.q.out deleted file mode 100644 index 3404f88..0000000 --- ql/src/test/results/clientpositive/alter_table_stats_status.q.out +++ /dev/null @@ -1,572 +0,0 @@ -PREHOOK: query: create database statsdb -PREHOOK: type: CREATEDATABASE -PREHOOK: Output: database:statsdb -POSTHOOK: query: create database statsdb -POSTHOOK: type: CREATEDATABASE -POSTHOOK: Output: database:statsdb -PREHOOK: query: use statsdb -PREHOOK: type: SWITCHDATABASE -PREHOOK: Input: database:statsdb -POSTHOOK: query: use statsdb -POSTHOOK: type: SWITCHDATABASE -POSTHOOK: Input: database:statsdb -PREHOOK: query: create table srctable like default.src -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:statsdb -PREHOOK: Output: statsdb@srctable -POSTHOOK: query: create table srctable like default.src -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:statsdb -POSTHOOK: Output: statsdb@srctable -PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table srctable -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: statsdb@srctable -POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table srctable -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: statsdb@srctable -PREHOOK: query: analyze table srctable compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: statsdb@srctable -PREHOOK: Output: statsdb@srctable -POSTHOOK: query: analyze table srctable compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: statsdb@srctable -POSTHOOK: Output: statsdb@srctable -PREHOOK: query: describe formatted srctable -PREHOOK: type: DESCTABLE -PREHOOK: Input: statsdb@srctable -POSTHOOK: query: describe formatted srctable -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: statsdb@srctable -# col_name data_type comment - -key string default -value string default - -# Detailed Table Information -Database: statsdb -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 1 - numRows 500 - rawDataSize 5312 - totalSize 5812 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: alter table srctable touch -PREHOOK: type: ALTERTABLE_TOUCH -PREHOOK: Input: statsdb@srctable -PREHOOK: Output: statsdb@srctable -POSTHOOK: query: alter table srctable touch -POSTHOOK: type: ALTERTABLE_TOUCH -POSTHOOK: Input: statsdb@srctable -POSTHOOK: Output: statsdb@srctable -PREHOOK: query: alter table srctable rename to statstable -PREHOOK: type: ALTERTABLE_RENAME -PREHOOK: Input: statsdb@srctable -PREHOOK: Output: statsdb@srctable -POSTHOOK: query: alter table srctable rename to statstable -POSTHOOK: type: ALTERTABLE_RENAME -POSTHOOK: Input: statsdb@srctable -POSTHOOK: Output: statsdb@srctable -POSTHOOK: Output: statsdb@statstable -PREHOOK: query: alter table statstable add columns (newcol string) -PREHOOK: type: ALTERTABLE_ADDCOLS -PREHOOK: Input: statsdb@statstable -PREHOOK: Output: statsdb@statstable -POSTHOOK: query: alter table statstable add columns (newcol string) -POSTHOOK: type: ALTERTABLE_ADDCOLS -POSTHOOK: Input: statsdb@statstable -POSTHOOK: Output: statsdb@statstable -PREHOOK: query: alter table statstable change key key string -PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: statsdb@statstable -PREHOOK: Output: statsdb@statstable -POSTHOOK: query: alter table statstable change key key string -POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: statsdb@statstable -POSTHOOK: Output: statsdb@statstable -PREHOOK: query: alter table statstable set tblproperties('testtblstats'='unchange') -PREHOOK: type: ALTERTABLE_PROPERTIES -PREHOOK: Input: statsdb@statstable -PREHOOK: Output: statsdb@statstable -POSTHOOK: query: alter table statstable set tblproperties('testtblstats'='unchange') -POSTHOOK: type: ALTERTABLE_PROPERTIES -POSTHOOK: Input: statsdb@statstable -POSTHOOK: Output: statsdb@statstable -PREHOOK: query: describe formatted statstable -PREHOOK: type: DESCTABLE -PREHOOK: Input: statsdb@statstable -POSTHOOK: query: describe formatted statstable -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: statsdb@statstable -# col_name data_type comment - -key string default -value string default -newcol string - -# Detailed Table Information -Database: statsdb -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} -#### A masked pattern was here #### - numFiles 1 - numRows 500 - rawDataSize 5312 - testtblstats unchange - totalSize 5812 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: alter table statstable update statistics set ('numRows' = '1000') -PREHOOK: type: ALTERTABLE_UPDATETABLESTATS -PREHOOK: Input: statsdb@statstable -PREHOOK: Output: statsdb@statstable -POSTHOOK: query: alter table statstable update statistics set ('numRows' = '1000') -POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS -POSTHOOK: Input: statsdb@statstable -POSTHOOK: Output: statsdb@statstable -PREHOOK: query: describe formatted statstable -PREHOOK: type: DESCTABLE -PREHOOK: Input: statsdb@statstable -POSTHOOK: query: describe formatted statstable -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: statsdb@statstable -# col_name data_type comment - -key string default -value string default -newcol string - -# Detailed Table Information -Database: statsdb -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: -#### A masked pattern was here #### - numFiles 1 - numRows 1000 - rawDataSize 5312 - testtblstats unchange - totalSize 5812 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: analyze table statstable compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: statsdb@statstable -PREHOOK: Output: statsdb@statstable -POSTHOOK: query: analyze table statstable compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: statsdb@statstable -POSTHOOK: Output: statsdb@statstable -PREHOOK: query: describe formatted statstable -PREHOOK: type: DESCTABLE -PREHOOK: Input: statsdb@statstable -POSTHOOK: query: describe formatted statstable -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: statsdb@statstable -# col_name data_type comment - -key string default -value string default -newcol string - -# Detailed Table Information -Database: statsdb -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} -#### A masked pattern was here #### - numFiles 1 - numRows 500 - rawDataSize 5312 - testtblstats unchange - totalSize 5812 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -#### A masked pattern was here #### -PREHOOK: type: ALTERTABLE_LOCATION -PREHOOK: Input: statsdb@statstable -#### A masked pattern was here #### -PREHOOK: Output: statsdb@statstable -#### A masked pattern was here #### -POSTHOOK: type: ALTERTABLE_LOCATION -POSTHOOK: Input: statsdb@statstable -#### A masked pattern was here #### -POSTHOOK: Output: statsdb@statstable -PREHOOK: query: describe formatted statstable -PREHOOK: type: DESCTABLE -PREHOOK: Input: statsdb@statstable -POSTHOOK: query: describe formatted statstable -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: statsdb@statstable -# col_name data_type comment - -key string default -value string default -newcol string - -# Detailed Table Information -Database: statsdb -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: -#### A masked pattern was here #### - numFiles 0 - numRows 500 - rawDataSize 5312 - testtblstats unchange - totalSize 0 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table statstable -PREHOOK: type: DROPTABLE -PREHOOK: Input: statsdb@statstable -PREHOOK: Output: statsdb@statstable -POSTHOOK: query: drop table statstable -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: statsdb@statstable -POSTHOOK: Output: statsdb@statstable -PREHOOK: query: create table srcpart like default.srcpart -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:statsdb -PREHOOK: Output: statsdb@srcpart -POSTHOOK: query: create table srcpart like default.srcpart -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:statsdb -POSTHOOK: Output: statsdb@srcpart -PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table srcpart partition (ds='2008-04-08', hr='11') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: statsdb@srcpart -POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table srcpart partition (ds='2008-04-08', hr='11') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: statsdb@srcpart -POSTHOOK: Output: statsdb@srcpart@ds=2008-04-08/hr=11 -PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table srcpart partition (ds='2008-04-08', hr='12') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: statsdb@srcpart -POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table srcpart partition (ds='2008-04-08', hr='12') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: statsdb@srcpart -POSTHOOK: Output: statsdb@srcpart@ds=2008-04-08/hr=12 -PREHOOK: query: analyze table srcpart partition (ds='2008-04-08', hr='11') compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: statsdb@srcpart -PREHOOK: Input: statsdb@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Output: statsdb@srcpart -PREHOOK: Output: statsdb@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: query: analyze table srcpart partition (ds='2008-04-08', hr='11') compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: statsdb@srcpart -POSTHOOK: Input: statsdb@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Output: statsdb@srcpart -POSTHOOK: Output: statsdb@srcpart@ds=2008-04-08/hr=11 -PREHOOK: query: describe formatted srcpart partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: statsdb@srcpart -POSTHOOK: query: describe formatted srcpart partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: statsdb@srcpart -# col_name data_type comment - -key string default -value string default - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: statsdb -Table: srcpart -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 1 - numRows 500 - rawDataSize 5312 - totalSize 5812 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: alter table srcpart touch -PREHOOK: type: ALTERTABLE_TOUCH -PREHOOK: Input: statsdb@srcpart -PREHOOK: Output: statsdb@srcpart -POSTHOOK: query: alter table srcpart touch -POSTHOOK: type: ALTERTABLE_TOUCH -POSTHOOK: Input: statsdb@srcpart -POSTHOOK: Output: statsdb@srcpart -PREHOOK: query: alter table srcpart partition (ds='2008-04-08', hr='11') rename to partition (ds='2017-01-19', hr='11') -PREHOOK: type: ALTERTABLE_RENAMEPART -PREHOOK: Input: statsdb@srcpart -PREHOOK: Output: statsdb@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: query: alter table srcpart partition (ds='2008-04-08', hr='11') rename to partition (ds='2017-01-19', hr='11') -POSTHOOK: type: ALTERTABLE_RENAMEPART -POSTHOOK: Input: statsdb@srcpart -POSTHOOK: Input: statsdb@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Output: statsdb@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Output: statsdb@srcpart@ds=2017-01-19/hr=11 -PREHOOK: query: alter table srcpart partition (ds='2017-01-19', hr='11') add columns (newcol string) -PREHOOK: type: ALTERTABLE_ADDCOLS -PREHOOK: Input: statsdb@srcpart -PREHOOK: Output: statsdb@srcpart@ds=2017-01-19/hr=11 -POSTHOOK: query: alter table srcpart partition (ds='2017-01-19', hr='11') add columns (newcol string) -POSTHOOK: type: ALTERTABLE_ADDCOLS -POSTHOOK: Input: statsdb@srcpart -POSTHOOK: Input: statsdb@srcpart@ds=2017-01-19/hr=11 -POSTHOOK: Output: statsdb@srcpart@ds=2017-01-19/hr=11 -PREHOOK: query: alter table srcpart partition (ds='2017-01-19', hr='11') change key key string -PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: statsdb@srcpart -PREHOOK: Output: statsdb@srcpart@ds=2017-01-19/hr=11 -POSTHOOK: query: alter table srcpart partition (ds='2017-01-19', hr='11') change key key string -POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: statsdb@srcpart -POSTHOOK: Input: statsdb@srcpart@ds=2017-01-19/hr=11 -POSTHOOK: Output: statsdb@srcpart@ds=2017-01-19/hr=11 -PREHOOK: query: alter table srcpart set tblproperties('testpartstats'='unchange') -PREHOOK: type: ALTERTABLE_PROPERTIES -PREHOOK: Input: statsdb@srcpart -PREHOOK: Output: statsdb@srcpart -POSTHOOK: query: alter table srcpart set tblproperties('testpartstats'='unchange') -POSTHOOK: type: ALTERTABLE_PROPERTIES -POSTHOOK: Input: statsdb@srcpart -POSTHOOK: Output: statsdb@srcpart -PREHOOK: query: describe formatted srcpart partition (ds='2017-01-19', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: statsdb@srcpart -POSTHOOK: query: describe formatted srcpart partition (ds='2017-01-19', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: statsdb@srcpart -# col_name data_type comment - -key string default -value string default -newcol string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2017-01-19, 11] -Database: statsdb -Table: srcpart -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} -#### A masked pattern was here #### - numFiles 1 - numRows 500 - rawDataSize 5312 - totalSize 5812 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: alter table srcpart partition (ds='2017-01-19', hr='11') update statistics set ('numRows' = '1000') -PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS -PREHOOK: Input: statsdb@srcpart -PREHOOK: Output: statsdb@srcpart@ds=2017-01-19/hr=11 -POSTHOOK: query: alter table srcpart partition (ds='2017-01-19', hr='11') update statistics set ('numRows' = '1000') -POSTHOOK: type: ALTERTABLE_UPDATEPARTSTATS -POSTHOOK: Input: statsdb@srcpart -POSTHOOK: Input: statsdb@srcpart@ds=2017-01-19/hr=11 -POSTHOOK: Output: statsdb@srcpart@ds=2017-01-19/hr=11 -PREHOOK: query: describe formatted srcpart partition (ds='2017-01-19', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: statsdb@srcpart -POSTHOOK: query: describe formatted srcpart partition (ds='2017-01-19', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: statsdb@srcpart -# col_name data_type comment - -key string default -value string default -newcol string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2017-01-19, 11] -Database: statsdb -Table: srcpart -#### A masked pattern was here #### -Partition Parameters: -#### A masked pattern was here #### - numFiles 1 - numRows 1000 - rawDataSize 5312 - totalSize 5812 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: analyze table srcpart partition (ds='2017-01-19', hr='11') compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: statsdb@srcpart -PREHOOK: Input: statsdb@srcpart@ds=2017-01-19/hr=11 -PREHOOK: Output: statsdb@srcpart -PREHOOK: Output: statsdb@srcpart@ds=2017-01-19/hr=11 -POSTHOOK: query: analyze table srcpart partition (ds='2017-01-19', hr='11') compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: statsdb@srcpart -POSTHOOK: Input: statsdb@srcpart@ds=2017-01-19/hr=11 -POSTHOOK: Output: statsdb@srcpart -POSTHOOK: Output: statsdb@srcpart@ds=2017-01-19/hr=11 -PREHOOK: query: describe formatted srcpart partition (ds='2017-01-19', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: statsdb@srcpart -POSTHOOK: query: describe formatted srcpart partition (ds='2017-01-19', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: statsdb@srcpart -# col_name data_type comment - -key string default -value string default -newcol string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2017-01-19, 11] -Database: statsdb -Table: srcpart -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} -#### A masked pattern was here #### - numFiles 1 - numRows 500 - rawDataSize 5312 - totalSize 5812 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table srcpart -PREHOOK: type: DROPTABLE -PREHOOK: Input: statsdb@srcpart -PREHOOK: Output: statsdb@srcpart -POSTHOOK: query: drop table srcpart -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: statsdb@srcpart -POSTHOOK: Output: statsdb@srcpart diff --git ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out index af21343..6c7d0ee 100644 --- ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out +++ ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out @@ -168,7 +168,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 2 numRows 3 @@ -248,12 +247,44 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1) from calendar POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: calendar + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator - limit: 1 + limit: -1 Processor Tree: ListSink diff --git ql/src/test/results/clientpositive/constant_prop_3.q.out ql/src/test/results/clientpositive/constant_prop_3.q.out index ecf6e57..066ed07 100644 --- ql/src/test/results/clientpositive/constant_prop_3.q.out +++ ql/src/test/results/clientpositive/constant_prop_3.q.out @@ -88,7 +88,7 @@ POSTHOOK: query: analyze table supplier_hive compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@supplier_hive #### A masked pattern was here #### -Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select p_brand, p_type, @@ -378,34 +378,30 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: s_suppkey (type: int) - outputColumnNames: s_suppkey + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: s_suppkey (type: int) + keys: _col0 (type: int), true (type: boolean) mode: hash - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/create_alter_list_bucketing_table1.q.out ql/src/test/results/clientpositive/create_alter_list_bucketing_table1.q.out index 7d8e5c1..6af696a 100644 --- ql/src/test/results/clientpositive/create_alter_list_bucketing_table1.q.out +++ ql/src/test/results/clientpositive/create_alter_list_bucketing_table1.q.out @@ -74,7 +74,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -121,7 +120,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -175,7 +173,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -222,7 +219,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -268,7 +264,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 diff --git ql/src/test/results/clientpositive/create_like.q.out ql/src/test/results/clientpositive/create_like.q.out index ff2e752..58d9879 100644 --- ql/src/test/results/clientpositive/create_like.q.out +++ ql/src/test/results/clientpositive/create_like.q.out @@ -354,7 +354,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} avro.schema.literal { \"namespace\": \"testing.hive.avro.serde\", \"name\": \"doctors\", diff --git ql/src/test/results/clientpositive/describe_comment_nonascii.q.out ql/src/test/results/clientpositive/describe_comment_nonascii.q.out index 70f234e..de1cca9 100644 --- ql/src/test/results/clientpositive/describe_comment_nonascii.q.out +++ ql/src/test/results/clientpositive/describe_comment_nonascii.q.out @@ -49,7 +49,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 0 numRows 0 diff --git ql/src/test/results/clientpositive/equal_ns.q.out ql/src/test/results/clientpositive/equal_ns.q.out deleted file mode 100644 index 6a38c05..0000000 --- ql/src/test/results/clientpositive/equal_ns.q.out +++ /dev/null @@ -1,29 +0,0 @@ -PREHOOK: query: create table test(x string, y string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test -POSTHOOK: query: create table test(x string, y string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test -PREHOOK: query: insert into test values ('q', 'q'), ('q', 'w'), (NULL, 'q'), ('q', NULL), (NULL, NULL) -PREHOOK: type: QUERY -PREHOOK: Output: default@test -POSTHOOK: query: insert into test values ('q', 'q'), ('q', 'w'), (NULL, 'q'), ('q', NULL), (NULL, NULL) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@test -POSTHOOK: Lineage: test.x SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: test.y SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: select *, x<=>y, not (x<=> y), (x <=> y) = false from test -PREHOOK: type: QUERY -PREHOOK: Input: default@test -#### A masked pattern was here #### -POSTHOOK: query: select *, x<=>y, not (x<=> y), (x <=> y) = false from test -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test -#### A masked pattern was here #### -NULL NULL true false false -NULL q false true true -q NULL false true true -q q true false false -q w false true true diff --git ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index 1b6bb1f..e2310d7 100644 --- ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -3324,7 +3324,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) @@ -3419,19 +3418,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -3468,19 +3464,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Union 6 Vertex: Union 6 @@ -3531,7 +3524,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) @@ -3628,19 +3620,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -3677,19 +3666,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Union 6 Vertex: Union 6 @@ -3742,7 +3728,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: ds (type: string) @@ -3780,7 +3765,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: ds (type: string) @@ -3865,19 +3849,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3930,19 +3911,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Union 3 Vertex: Union 3 Union 9 @@ -5687,7 +5665,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) @@ -5748,19 +5725,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -5812,19 +5786,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Union 4 Vertex: Union 4 diff --git ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index cacde93..e89526e 100644 --- ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -1509,280 +1509,6 @@ POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### 0 -PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: srcpart_date - filterExpr: (key is not null and key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: srcpart_small - filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart_date -PREHOOK: Input: default@srcpart_date@ds=2008-04-08 -PREHOOK: Input: default@srcpart_date@ds=2008-04-09 -PREHOOK: Input: default@srcpart_small -PREHOOK: Input: default@srcpart_small@ds=2008-04-08 -PREHOOK: Input: default@srcpart_small@ds=2008-04-09 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart_date -POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 -POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 -POSTHOOK: Input: default@srcpart_small -POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 -POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 -#### A masked pattern was here #### -8224 -PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: srcpart_date - filterExpr: (key is not null and key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: srcpart_small - filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Execution mode: llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart_date -PREHOOK: Input: default@srcpart_date@ds=2008-04-08 -PREHOOK: Input: default@srcpart_date@ds=2008-04-09 -PREHOOK: Input: default@srcpart_small -PREHOOK: Input: default@srcpart_small@ds=2008-04-08 -PREHOOK: Input: default@srcpart_small@ds=2008-04-09 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart_date -POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 -POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 -POSTHOOK: Input: default@srcpart_small -POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 -POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 -#### A masked pattern was here #### -8224 PREHOOK: query: drop table srcpart_date PREHOOK: type: DROPTABLE PREHOOK: Input: default@srcpart_date diff --git ql/src/test/results/clientpositive/llap/explainuser_1.q.out ql/src/test/results/clientpositive/llap/explainuser_1.q.out index c3f8071..b9489eb 100644 --- ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -2139,31 +2139,29 @@ Stage-0 limit:-1 Stage-1 Reducer 2 llap - File Output Operator [FS_14] - Merge Join Operator [MERGEJOIN_19] (rows=166 width=178) - Conds:RS_10._col0=RS_11._col0(Inner),Output:["_col0","_col1"] + File Output Operator [FS_13] + Merge Join Operator [MERGEJOIN_18] (rows=168 width=178) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] + SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=166 width=178) + Select Operator [SEL_1] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_17] (rows=166 width=178) - predicate:(key > '9') - TableScan [TS_0] (rows=500 width=178) - default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + TableScan [TS_0] (rows=500 width=178) + default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] + SHUFFLE [RS_10] PartitionCols:_col0 - Group By Operator [GBY_8] (rows=69 width=87) + Group By Operator [GBY_7] (rows=69 width=87) Output:["_col0"],keys:KEY._col0 <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] + SHUFFLE [RS_6] PartitionCols:_col0 - Group By Operator [GBY_6] (rows=69 width=87) + Group By Operator [GBY_5] (rows=69 width=87) Output:["_col0"],keys:key - Filter Operator [FIL_18] (rows=166 width=87) + Filter Operator [FIL_17] (rows=166 width=87) predicate:(key > '9') - TableScan [TS_3] (rows=500 width=87) + TableScan [TS_2] (rows=500 width=87) default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] PREHOOK: query: explain select p.p_partkey, li.l_suppkey @@ -2310,71 +2308,65 @@ Stage-0 limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_36] - Merge Join Operator [MERGEJOIN_49] (rows=34 width=186) - Conds:RS_32._col2=RS_33._col0(Inner),Output:["_col0","_col1","_col2"] + File Output Operator [FS_33] + Merge Join Operator [MERGEJOIN_46] (rows=34 width=186) + Conds:RS_29._col2=RS_30._col0(Inner),Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_32] + SHUFFLE [RS_29] PartitionCols:_col2 - Filter Operator [FIL_42] (rows=83 width=186) - predicate:_col2 is not null - Group By Operator [GBY_16] (rows=83 width=186) - Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] - PartitionCols:_col0, _col1 - Group By Operator [GBY_14] (rows=83 width=186) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1 - Merge Join Operator [MERGEJOIN_48] (rows=166 width=178) - Conds:RS_10._col0=RS_11._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_43] (rows=166 width=178) - predicate:(key > '8') - TableScan [TS_0] (rows=500 width=178) - default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - PartitionCols:_col0 - Group By Operator [GBY_8] (rows=69 width=87) - Output:["_col0"],keys:KEY._col0 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0 - Group By Operator [GBY_6] (rows=69 width=87) - Output:["_col0"],keys:key - Filter Operator [FIL_44] (rows=166 width=87) - predicate:(key > '8') - TableScan [TS_3] (rows=500 width=87) - default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + Group By Operator [GBY_15] (rows=84 width=186) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0, _col1 + Group By Operator [GBY_13] (rows=84 width=186) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1 + Merge Join Operator [MERGEJOIN_45] (rows=168 width=178) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_9] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=178) + default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_10] + PartitionCols:_col0 + Group By Operator [GBY_7] (rows=69 width=87) + Output:["_col0"],keys:KEY._col0 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_6] + PartitionCols:_col0 + Group By Operator [GBY_5] (rows=69 width=87) + Output:["_col0"],keys:key + Filter Operator [FIL_41] (rows=166 width=87) + predicate:(key > '8') + TableScan [TS_2] (rows=500 width=87) + default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_33] + SHUFFLE [RS_30] PartitionCols:_col0 - Group By Operator [GBY_30] (rows=34 width=8) + Group By Operator [GBY_27] (rows=34 width=8) Output:["_col0"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_29] + SHUFFLE [RS_26] PartitionCols:_col0 - Group By Operator [GBY_28] (rows=34 width=8) + Group By Operator [GBY_25] (rows=34 width=8) Output:["_col0"],keys:_col1 - Filter Operator [FIL_45] (rows=69 width=8) - predicate:_col1 is not null - Select Operator [SEL_47] (rows=69 width=8) - Output:["_col1"] - Group By Operator [GBY_24] (rows=69 width=95) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] - PartitionCols:_col0 - Group By Operator [GBY_22] (rows=69 width=95) - Output:["_col0","_col1"],aggregations:["count()"],keys:key - Filter Operator [FIL_46] (rows=166 width=87) - predicate:(key > '9') - TableScan [TS_19] (rows=500 width=87) - default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + Select Operator [SEL_44] (rows=69 width=8) + Output:["_col1"] + Group By Operator [GBY_22] (rows=69 width=95) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] + PartitionCols:_col0 + Group By Operator [GBY_20] (rows=69 width=95) + Output:["_col0","_col1"],aggregations:["count()"],keys:key + Filter Operator [FIL_43] (rows=166 width=87) + predicate:(key > '9') + TableScan [TS_17] (rows=500 width=87) + default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] PREHOOK: query: explain select p_mfgr, p_name, avg(p_size) from part @@ -2401,48 +2393,44 @@ Stage-0 limit:-1 Stage-1 Reducer 3 llap - File Output Operator [FS_23] - Merge Join Operator [MERGEJOIN_28] (rows=6 width=227) - Conds:RS_19._col1=RS_20._col0(Inner),Output:["_col0","_col1","_col2"] + File Output Operator [FS_21] + Merge Join Operator [MERGEJOIN_26] (rows=6 width=227) + Conds:RS_17._col1=RS_18._col0(Inner),Output:["_col0","_col1","_col2"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + SHUFFLE [RS_17] PartitionCols:_col1 - Select Operator [SEL_6] (rows=13 width=227) + Select Operator [SEL_5] (rows=13 width=227) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_5] (rows=13 width=227) + Group By Operator [GBY_4] (rows=13 width=227) Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] + SHUFFLE [RS_3] PartitionCols:_col0, _col1 - Group By Operator [GBY_3] (rows=13 width=295) + Group By Operator [GBY_2] (rows=13 width=295) Output:["_col0","_col1","_col2"],aggregations:["avg(p_size)"],keys:p_name, p_mfgr - Filter Operator [FIL_26] (rows=26 width=223) - predicate:p_name is not null - TableScan [TS_0] (rows=26 width=223) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] + TableScan [TS_0] (rows=26 width=223) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] + SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_17] (rows=13 width=184) + Group By Operator [GBY_15] (rows=13 width=184) Output:["_col0"],keys:KEY._col0 <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_16] + SHUFFLE [RS_14] PartitionCols:_col0 - Group By Operator [GBY_15] (rows=13 width=184) + Group By Operator [GBY_13] (rows=13 width=184) Output:["_col0"],keys:_col0 - Select Operator [SEL_11] (rows=26 width=491) + Select Operator [SEL_10] (rows=26 width=491) Output:["_col0"] - Filter Operator [FIL_27] (rows=26 width=491) - predicate:first_value_window_0 is not null - PTF Operator [PTF_10] (rows=26 width=491) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_9] (rows=26 width=491) - Output:["_col1","_col2","_col5"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] - PartitionCols:p_mfgr - TableScan [TS_7] (rows=26 width=223) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"] + PTF Operator [PTF_9] (rows=26 width=491) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] + Select Operator [SEL_8] (rows=26 width=491) + Output:["_col1","_col2","_col5"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:p_mfgr + TableScan [TS_6] (rows=26 width=223) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"] PREHOOK: query: explain select * from src_cbo @@ -2472,53 +2460,53 @@ Stage-0 limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_26] - Select Operator [SEL_25] (rows=500 width=178) + File Output Operator [FS_27] + Select Operator [SEL_26] (rows=500 width=178) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_24] - Select Operator [SEL_23] (rows=500 width=178) + SHUFFLE [RS_25] + Select Operator [SEL_24] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=500 width=198) + Filter Operator [FIL_23] (rows=500 width=198) predicate:((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) - Merge Join Operator [MERGEJOIN_31] (rows=500 width=198) - Conds:RS_19._col0=RS_20._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_32] (rows=500 width=198) + Conds:RS_20._col0=RS_21._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + SHUFFLE [RS_20] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_30] (rows=500 width=194) + Merge Join Operator [MERGEJOIN_31] (rows=500 width=194) Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_16] + PARTITION_ONLY_SHUFFLE [RS_17] Select Operator [SEL_1] (rows=500 width=178) Output:["_col0","_col1"] TableScan [TS_0] (rows=500 width=178) default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_17] + PARTITION_ONLY_SHUFFLE [RS_18] Group By Operator [GBY_7] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"] <-Map 5 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_6] Group By Operator [GBY_5] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["count()","count(key)"] - Filter Operator [FIL_28] (rows=166 width=87) + Filter Operator [FIL_29] (rows=166 width=87) predicate:(key > '2') TableScan [TS_2] (rows=500 width=87) default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] + SHUFFLE [RS_21] PartitionCols:_col0 - Select Operator [SEL_15] (rows=69 width=91) - Output:["_col0","_col1"] - Group By Operator [GBY_14] (rows=69 width=87) - Output:["_col0"],keys:KEY._col0 - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_13] - PartitionCols:_col0 - Group By Operator [GBY_12] (rows=69 width=87) - Output:["_col0"],keys:key - Filter Operator [FIL_29] (rows=166 width=87) + Group By Operator [GBY_15] (rows=69 width=91) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0, _col1 + Group By Operator [GBY_13] (rows=69 width=91) + Output:["_col0","_col1"],keys:_col0, true + Select Operator [SEL_11] (rows=166 width=87) + Output:["_col0"] + Filter Operator [FIL_30] (rows=166 width=87) predicate:(key > '2') TableScan [TS_9] (rows=500 width=87) default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] @@ -2735,25 +2723,23 @@ Stage-0 <-Reducer 9 [SIMPLE_EDGE] llap SHUFFLE [RS_30] PartitionCols:_col0 - Select Operator [SEL_25] (rows=1 width=12) - Output:["_col0","_col1"] - Group By Operator [GBY_24] (rows=1 width=8) - Output:["_col0"],keys:KEY._col0 - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] - PartitionCols:_col0 - Group By Operator [GBY_22] (rows=1 width=8) - Output:["_col0"],keys:_col0 - Group By Operator [GBY_19] (rows=1 width=8) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_18] - Group By Operator [GBY_17] (rows=1 width=76) - Output:["_col0"],aggregations:["avg(p_size)"] - Filter Operator [FIL_40] (rows=8 width=4) - predicate:(p_size < 10) - TableScan [TS_14] (rows=26 width=4) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"] + Group By Operator [GBY_24] (rows=1 width=12) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=1 width=12) + Output:["_col0","_col1"],keys:_col0, true + Group By Operator [GBY_19] (rows=1 width=8) + Output:["_col0"],aggregations:["avg(VALUE._col0)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=76) + Output:["_col0"],aggregations:["avg(p_size)"] + Filter Operator [FIL_40] (rows=8 width=4) + predicate:(p_size < 10) + TableScan [TS_14] (rows=26 width=4) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"] PREHOOK: query: explain select b.p_mfgr, min(p_retailprice) from part b diff --git ql/src/test/results/clientpositive/llap/lineage3.q.out ql/src/test/results/clientpositive/llap/lineage3.q.out index f092967..04ae206 100644 --- ql/src/test/results/clientpositive/llap/lineage3.q.out +++ ql/src/test/results/clientpositive/llap/lineage3.q.out @@ -166,7 +166,7 @@ where key in (select key+18 from src1) order by key PREHOOK: type: QUERY PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"tez","database":"default","hash":"8b9d63653e36ecf4dd425d3cc3de9199","queryText":"select key, value from src1\nwhere key in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) + 18.0) is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"8b9d63653e36ecf4dd425d3cc3de9199","queryText":"select key, value from src1\nwhere key in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} 146 val_146 273 val_273 PREHOOK: query: select * from src1 a @@ -186,7 +186,7 @@ where key not in (select key+18 from src1) order by key PREHOOK: type: QUERY PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"tez","database":"default","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"((count(*) = 0) or (true is null and src1.key is not null and (count((UDFToDouble(src1.key) + 18.0)) >= count(*))))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.src1"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"((count(*) = 0) or (i is null and src1.key is not null and (count((UDFToDouble(src1.key) + 18.0)) >= count(*))))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.src1"}]} PREHOOK: query: select * from src1 a where not exists (select cint from alltypesorc b diff --git ql/src/test/results/clientpositive/llap/mergejoin.q.out ql/src/test/results/clientpositive/llap/mergejoin.q.out index 6114548..4ec2a71 100644 --- ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -92,7 +92,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=25) @@ -321,7 +321,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 5 Map Operator Tree: @@ -341,7 +341,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -378,7 +378,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) @@ -1434,7 +1434,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 5 Map Operator Tree: @@ -1453,7 +1453,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -1490,7 +1490,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) @@ -1565,7 +1565,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: @@ -1594,7 +1594,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -1631,7 +1631,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=500) @@ -1831,7 +1831,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 6 Map Operator Tree: @@ -1851,7 +1851,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 7 Map Operator Tree: @@ -1937,7 +1937,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) @@ -1949,7 +1949,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 8 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=25) @@ -2034,7 +2034,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 5 Map Operator Tree: @@ -2054,7 +2054,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -2091,7 +2091,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) @@ -2224,7 +2224,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 9 Map Operator Tree: @@ -2244,7 +2244,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -2310,7 +2310,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=508) @@ -2380,7 +2380,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 5 Map Operator Tree: @@ -2400,7 +2400,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -2437,7 +2437,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) @@ -2524,7 +2524,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 6 Map Operator Tree: @@ -2544,7 +2544,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 7 Map Operator Tree: @@ -2630,7 +2630,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) @@ -2642,7 +2642,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 8 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=25) @@ -2777,7 +2777,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 9 Map Operator Tree: @@ -2797,7 +2797,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -2863,7 +2863,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=508) @@ -2954,10 +2954,10 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -3016,7 +3016,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) diff --git ql/src/test/results/clientpositive/llap/orc_llap.q.out ql/src/test/results/clientpositive/llap/orc_llap.q.out index 4fb3d12..90055a5 100644 --- ql/src/test/results/clientpositive/llap/orc_llap.q.out +++ ql/src/test/results/clientpositive/llap/orc_llap.q.out @@ -597,7 +597,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: smallint), _col2 (type: binary) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 5 Map Operator Tree: @@ -618,7 +618,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: smallint) Statistics: Num rows: 122880 Data size: 29079940 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -660,7 +660,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=122880) @@ -1089,7 +1089,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: smallint), _col2 (type: binary) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 5 Map Operator Tree: @@ -1110,7 +1110,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: smallint) Statistics: Num rows: 245760 Data size: 58159880 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -1152,7 +1152,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=245760) diff --git ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out index e28ed5d..b186b01 100644 --- ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out +++ ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out @@ -57,6 +57,7 @@ PREHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, c char(50), v varchar(50), da date, + ts timestamp, dec decimal(4,2), bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") @@ -74,17 +75,18 @@ POSTHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, c char(50), v varchar(50), da date, + ts timestamp, dec decimal(4,2), bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@orc_ppd_staging -PREHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), dec, bin from staging order by t, s +PREHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s PREHOOK: type: QUERY PREHOOK: Input: default@staging PREHOOK: Output: default@orc_ppd_staging -POSTHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), dec, bin from staging order by t, s +POSTHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s POSTHOOK: type: QUERY POSTHOOK: Input: default@staging POSTHOOK: Output: default@orc_ppd_staging @@ -100,12 +102,13 @@ POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [(staging)staging.FieldSchema(name:i POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] POSTHOOK: Lineage: orc_ppd_staging.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_ppd_staging.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] -PREHOOK: query: insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11",-71.54,"aaa" from staging limit 1 +PREHOOK: query: insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@staging PREHOOK: Output: default@orc_ppd_staging -POSTHOOK: query: insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11",-71.54,"aaa" from staging limit 1 +POSTHOOK: query: insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@staging POSTHOOK: Output: default@orc_ppd_staging @@ -121,12 +124,13 @@ POSTHOOK: Lineage: orc_ppd_staging.i EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] -PREHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11",71.54,"zzz" from staging limit 1 +PREHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@staging PREHOOK: Output: default@orc_ppd_staging -POSTHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11",71.54,"zzz" from staging limit 1 +POSTHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@staging POSTHOOK: Output: default@orc_ppd_staging @@ -142,6 +146,7 @@ POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] PREHOOK: query: CREATE TABLE orc_ppd(t tinyint, si smallint, @@ -154,6 +159,7 @@ PREHOOK: query: CREATE TABLE orc_ppd(t tinyint, c char(50), v varchar(50), da date, + ts timestamp, dec decimal(4,2), bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") @@ -171,17 +177,18 @@ POSTHOOK: query: CREATE TABLE orc_ppd(t tinyint, c char(50), v varchar(50), da date, + ts timestamp, dec decimal(4,2), bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@orc_ppd -PREHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), da, dec, bin from orc_ppd_staging order by t, s +PREHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd_staging PREHOOK: Output: default@orc_ppd -POSTHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), da, dec, bin from orc_ppd_staging order by t, s +POSTHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_ppd_staging POSTHOOK: Output: default@orc_ppd @@ -190,20 +197,21 @@ POSTHOOK: Lineage: orc_ppd.bin SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSch POSTHOOK: Lineage: orc_ppd.bo SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bo, type:boolean, comment:null), ] POSTHOOK: Lineage: orc_ppd.c EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] POSTHOOK: Lineage: orc_ppd.d SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:d, type:double, comment:null), ] -POSTHOOK: Lineage: orc_ppd.da SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:da, type:date, comment:null), ] +POSTHOOK: Lineage: orc_ppd.da EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: orc_ppd.dec SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] POSTHOOK: Lineage: orc_ppd.f SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:f, type:float, comment:null), ] POSTHOOK: Lineage: orc_ppd.i SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:i, type:int, comment:null), ] POSTHOOK: Lineage: orc_ppd.s SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] POSTHOOK: Lineage: orc_ppd.si SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_ppd.t SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.ts SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: orc_ppd.v EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] PREHOOK: query: select count(*) from orc_ppd PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 16677 + HDFS_BYTES_READ: 16711 HDFS_BYTES_WRITTEN: 104 HDFS_READ_OPS: 5 HDFS_LARGE_READ_OPS: 0 @@ -213,7 +221,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2100 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_MISS: 2 NUM_DECODED_BATCHES: 1 @@ -240,7 +248,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 1467 + HDFS_BYTES_READ: 1501 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -250,7 +258,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 8 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: ALLOCATED_BYTES: 524288 ALLOCATED_USED_BYTES: 269 @@ -277,7 +285,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 22 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -302,7 +310,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 16 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -327,7 +335,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 18 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -377,7 +385,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 32 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -402,7 +410,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 32 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -427,7 +435,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 1697 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -452,7 +460,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 12 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -477,7 +485,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 1713 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -502,7 +510,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 6 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -527,7 +535,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 50 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -552,7 +560,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 318 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -605,7 +613,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 0 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 2 SELECTED_ROWGROUPS: 0 @@ -625,7 +633,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 0 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 2 SELECTED_ROWGROUPS: 0 @@ -649,7 +657,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 6132 + HDFS_BYTES_READ: 6166 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -659,7 +667,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: ALLOCATED_BYTES: 786432 ALLOCATED_USED_BYTES: 11299 @@ -687,7 +695,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 6 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -712,7 +720,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 6 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -751,7 +759,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2100 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -776,7 +784,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 0 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 2 SELECTED_ROWGROUPS: 0 @@ -796,7 +804,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -821,7 +829,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -846,7 +854,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -871,7 +879,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 81 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -896,7 +904,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 74 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -921,7 +929,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 12 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -946,7 +954,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 13 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -996,7 +1004,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 7 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -1021,7 +1029,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 0 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 2 SELECTED_ROWGROUPS: 0 @@ -1041,7 +1049,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 0 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 2 SELECTED_ROWGROUPS: 0 @@ -1061,7 +1069,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 0 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 2 SELECTED_ROWGROUPS: 0 @@ -1081,7 +1089,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -1106,7 +1114,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 6 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -1131,7 +1139,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 4229 CACHE_MISS_BYTES: 0 @@ -1156,7 +1164,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 4229 CACHE_MISS_BYTES: 0 @@ -1171,7 +1179,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 5181 + HDFS_BYTES_READ: 5215 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -1181,7 +1189,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: ALLOCATED_BYTES: 262144 ALLOCATED_USED_BYTES: 8400 @@ -1209,7 +1217,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 4809 CACHE_MISS_BYTES: 0 @@ -1234,7 +1242,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 4809 CACHE_MISS_BYTES: 0 @@ -1259,7 +1267,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 4809 CACHE_MISS_BYTES: 0 @@ -1278,16 +1286,39 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@alltypesorc PREHOOK: Output: database:default PREHOOK: Output: default@tmp_orcppd -Stage-1 LLAP IO COUNTERS: - SELECTED_ROWGROUPS: 3 +POSTHOOK: query: create temporary table tmp_orcppd + stored as orc + as select ctinyint, csmallint, cint , cbigint, cfloat, cdouble, + cstring1, cstring2, ctimestamp1, ctimestamp2 + from alltypesorc limit 20 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_orcppd PREHOOK: query: insert into table tmp_orcppd values(null, null, null, null, null, null, null, null, null, null) PREHOOK: type: QUERY PREHOOK: Output: default@tmp_orcppd -Stage-1 LLAP IO COUNTERS: +POSTHOOK: query: insert into table tmp_orcppd + values(null, null, null, null, null, + null, null, null, null, null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tmp_orcppd +POSTHOOK: Lineage: tmp_orcppd.cbigint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: tmp_orcppd.cdouble EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: tmp_orcppd.cfloat EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: tmp_orcppd.cint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: tmp_orcppd.csmallint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tmp_orcppd.cstring1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col7, type:string, comment:), ] +POSTHOOK: Lineage: tmp_orcppd.cstring2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col8, type:string, comment:), ] +POSTHOOK: Lineage: tmp_orcppd.ctimestamp1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col9, type:string, comment:), ] +POSTHOOK: Lineage: tmp_orcppd.ctimestamp2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col10, type:string, comment:), ] +POSTHOOK: Lineage: tmp_orcppd.ctinyint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] PREHOOK: query: drop table if exists tbl_orcppd_1_1 PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists tbl_orcppd_1_1 +POSTHOOK: type: DROPTABLE PREHOOK: query: create table tbl_orcppd_1_1 as select count(*) from tmp_orcppd where ctimestamp1> current_timestamp() and @@ -1298,10 +1329,25 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@tmp_orcppd PREHOOK: Output: database:default PREHOOK: Output: default@tbl_orcppd_1_1 +POSTHOOK: query: create table tbl_orcppd_1_1 as + select count(*) from tmp_orcppd + where ctimestamp1> current_timestamp() and + ctimestamp2 > current_timestamp() and + cstring1 like 'a*' and + cstring2 like 'a*' +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tmp_orcppd +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_orcppd_1_1 +POSTHOOK: Lineage: tbl_orcppd_1_1._c0 EXPRESSION [(tmp_orcppd)tmp_orcppd.null, ] PREHOOK: query: drop table if exists tmp_orcppd PREHOOK: type: DROPTABLE PREHOOK: Input: default@tmp_orcppd PREHOOK: Output: default@tmp_orcppd +POSTHOOK: query: drop table if exists tmp_orcppd +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tmp_orcppd +POSTHOOK: Output: default@tmp_orcppd PREHOOK: query: create temporary table tmp_orcppd stored as orc as select ctimestamp1, ctimestamp2 @@ -1310,15 +1356,28 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@alltypesorc PREHOOK: Output: database:default PREHOOK: Output: default@tmp_orcppd -Stage-1 LLAP IO COUNTERS: - SELECTED_ROWGROUPS: 3 +POSTHOOK: query: create temporary table tmp_orcppd + stored as orc + as select ctimestamp1, ctimestamp2 + from alltypesorc limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_orcppd PREHOOK: query: insert into table tmp_orcppd values(null, null) PREHOOK: type: QUERY PREHOOK: Output: default@tmp_orcppd -Stage-1 LLAP IO COUNTERS: +POSTHOOK: query: insert into table tmp_orcppd + values(null, null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tmp_orcppd +POSTHOOK: Lineage: tmp_orcppd.ctimestamp1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tmp_orcppd.ctimestamp2 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] PREHOOK: query: drop table if exists tbl_orcppd_2_1 PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists tbl_orcppd_2_1 +POSTHOOK: type: DROPTABLE PREHOOK: query: create table tbl_orcppd_2_1 as select count(*) from tmp_orcppd where ctimestamp1 in (cast('2065-08-13 19:03:52' as timestamp), cast('2071-01-16 20:21:17' as timestamp), current_timestamp()) @@ -1326,34 +1385,68 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@tmp_orcppd PREHOOK: Output: database:default PREHOOK: Output: default@tbl_orcppd_2_1 +POSTHOOK: query: create table tbl_orcppd_2_1 as + select count(*) from tmp_orcppd + where ctimestamp1 in (cast('2065-08-13 19:03:52' as timestamp), cast('2071-01-16 20:21:17' as timestamp), current_timestamp()) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tmp_orcppd +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_orcppd_2_1 +POSTHOOK: Lineage: tbl_orcppd_2_1._c0 EXPRESSION [(tmp_orcppd)tmp_orcppd.null, ] PREHOOK: query: drop table if exists tmp_orcppd PREHOOK: type: DROPTABLE PREHOOK: Input: default@tmp_orcppd PREHOOK: Output: default@tmp_orcppd +POSTHOOK: query: drop table if exists tmp_orcppd +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tmp_orcppd +POSTHOOK: Output: default@tmp_orcppd PREHOOK: query: create temporary table tmp_orcppd stored as orc - as select ts, cast(ts as date) - from staging + as select ts, da + from orc_ppd_staging PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@staging +PREHOOK: Input: default@orc_ppd_staging PREHOOK: Output: database:default PREHOOK: Output: default@tmp_orcppd -Stage-1 LLAP IO COUNTERS: +POSTHOOK: query: create temporary table tmp_orcppd + stored as orc + as select ts, da + from orc_ppd_staging +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@orc_ppd_staging +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_orcppd PREHOOK: query: insert into table tmp_orcppd values(null, null) PREHOOK: type: QUERY PREHOOK: Output: default@tmp_orcppd -Stage-1 LLAP IO COUNTERS: +POSTHOOK: query: insert into table tmp_orcppd + values(null, null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tmp_orcppd +POSTHOOK: Lineage: tmp_orcppd.da EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tmp_orcppd.ts EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] PREHOOK: query: drop table if exists tbl_orcppd_3_1 PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists tbl_orcppd_3_1 +POSTHOOK: type: DROPTABLE PREHOOK: query: create table tbl_orcppd_3_1 as select count(*) from tmp_orcppd - group by ts, cast(ts as date) + group by ts, da having ts in (select ctimestamp1 from alltypesorc limit 10) PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@alltypesorc PREHOOK: Input: default@tmp_orcppd PREHOOK: Output: database:default PREHOOK: Output: default@tbl_orcppd_3_1 -Stage-1 LLAP IO COUNTERS: - SELECTED_ROWGROUPS: 4 +POSTHOOK: query: create table tbl_orcppd_3_1 as + select count(*) from tmp_orcppd + group by ts, da + having ts in (select ctimestamp1 from alltypesorc limit 10) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@tmp_orcppd +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_orcppd_3_1 +POSTHOOK: Lineage: tbl_orcppd_3_1._c0 EXPRESSION [(tmp_orcppd)tmp_orcppd.null, ] diff --git ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out index 4cc8984..d73b583 100644 --- ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out +++ ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out @@ -57,6 +57,7 @@ PREHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, c char(50), v varchar(50), da date, + ts timestamp, dec decimal(4,2), bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") @@ -74,17 +75,18 @@ POSTHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, c char(50), v varchar(50), da date, + ts timestamp, dec decimal(4,2), bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@orc_ppd_staging -PREHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), dec, bin from staging order by t, s +PREHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s PREHOOK: type: QUERY PREHOOK: Input: default@staging PREHOOK: Output: default@orc_ppd_staging -POSTHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), dec, bin from staging order by t, s +POSTHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s POSTHOOK: type: QUERY POSTHOOK: Input: default@staging POSTHOOK: Output: default@orc_ppd_staging @@ -100,12 +102,13 @@ POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [(staging)staging.FieldSchema(name:i POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] POSTHOOK: Lineage: orc_ppd_staging.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_ppd_staging.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] -PREHOOK: query: insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11",-71.54,"aaa" from staging limit 1 +PREHOOK: query: insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@staging PREHOOK: Output: default@orc_ppd_staging -POSTHOOK: query: insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11",-71.54,"aaa" from staging limit 1 +POSTHOOK: query: insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@staging POSTHOOK: Output: default@orc_ppd_staging @@ -121,12 +124,13 @@ POSTHOOK: Lineage: orc_ppd_staging.i EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] -PREHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11",71.54,"zzz" from staging limit 1 +PREHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@staging PREHOOK: Output: default@orc_ppd_staging -POSTHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11",71.54,"zzz" from staging limit 1 +POSTHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@staging POSTHOOK: Output: default@orc_ppd_staging @@ -142,6 +146,7 @@ POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] PREHOOK: query: CREATE TABLE orc_ppd(t tinyint, si smallint, @@ -154,6 +159,7 @@ PREHOOK: query: CREATE TABLE orc_ppd(t tinyint, c char(50), v varchar(50), da date, + ts timestamp, dec decimal(4,2), bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") @@ -171,17 +177,18 @@ POSTHOOK: query: CREATE TABLE orc_ppd(t tinyint, c char(50), v varchar(50), da date, + ts timestamp, dec decimal(4,2), bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@orc_ppd -PREHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), da, dec, bin from orc_ppd_staging order by t, s +PREHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd_staging PREHOOK: Output: default@orc_ppd -POSTHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), da, dec, bin from orc_ppd_staging order by t, s +POSTHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_ppd_staging POSTHOOK: Output: default@orc_ppd @@ -190,20 +197,21 @@ POSTHOOK: Lineage: orc_ppd.bin SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSch POSTHOOK: Lineage: orc_ppd.bo SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bo, type:boolean, comment:null), ] POSTHOOK: Lineage: orc_ppd.c EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] POSTHOOK: Lineage: orc_ppd.d SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:d, type:double, comment:null), ] -POSTHOOK: Lineage: orc_ppd.da SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:da, type:date, comment:null), ] +POSTHOOK: Lineage: orc_ppd.da EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: orc_ppd.dec SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] POSTHOOK: Lineage: orc_ppd.f SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:f, type:float, comment:null), ] POSTHOOK: Lineage: orc_ppd.i SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:i, type:int, comment:null), ] POSTHOOK: Lineage: orc_ppd.s SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] POSTHOOK: Lineage: orc_ppd.si SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_ppd.t SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.ts SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: orc_ppd.v EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] PREHOOK: query: select count(*) from orc_ppd where t > 127 PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 17012 + HDFS_BYTES_READ: 17046 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 6 HDFS_LARGE_READ_OPS: 0 @@ -213,7 +221,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: ALLOCATED_BYTES: 524288 ALLOCATED_USED_BYTES: 269 @@ -253,7 +261,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 8 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -278,7 +286,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 8 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -303,7 +311,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 18 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -328,7 +336,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 18 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 249 CACHE_MISS_BYTES: 0 @@ -347,7 +355,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 16902 + HDFS_BYTES_READ: 16936 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -357,7 +365,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 0 @@ -380,7 +388,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 16902 + HDFS_BYTES_READ: 16936 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -390,7 +398,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 8 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 8 @@ -399,7 +407,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 17875 + HDFS_BYTES_READ: 17909 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -409,7 +417,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 8 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 8 @@ -418,7 +426,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 16902 + HDFS_BYTES_READ: 16936 HDFS_BYTES_WRITTEN: 102 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -428,7 +436,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 18 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 18 @@ -437,7 +445,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 17875 + HDFS_BYTES_READ: 17909 HDFS_BYTES_WRITTEN: 102 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -447,7 +455,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 18 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 18 @@ -460,7 +468,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 16902 + HDFS_BYTES_READ: 16936 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -470,7 +478,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 0 @@ -493,7 +501,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 16902 + HDFS_BYTES_READ: 16936 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -503,7 +511,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 8 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 8 @@ -512,7 +520,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 17875 + HDFS_BYTES_READ: 17909 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -522,7 +530,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 8 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 8 @@ -531,7 +539,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 16902 + HDFS_BYTES_READ: 16936 HDFS_BYTES_WRITTEN: 102 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -541,7 +549,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 18 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 18 @@ -550,7 +558,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 17875 + HDFS_BYTES_READ: 17909 HDFS_BYTES_WRITTEN: 102 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -560,7 +568,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 18 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 18 @@ -573,7 +581,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 16902 + HDFS_BYTES_READ: 16936 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -583,7 +591,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 0 @@ -606,7 +614,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 16902 + HDFS_BYTES_READ: 16936 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -616,7 +624,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 8 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 8 @@ -625,7 +633,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 17875 + HDFS_BYTES_READ: 17909 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -635,7 +643,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 8 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 8 @@ -644,7 +652,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 16902 + HDFS_BYTES_READ: 16936 HDFS_BYTES_WRITTEN: 102 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -654,7 +662,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 18 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 18 @@ -663,7 +671,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 17875 + HDFS_BYTES_READ: 17909 HDFS_BYTES_WRITTEN: 102 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -673,7 +681,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 18 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 18 @@ -686,7 +694,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 16902 + HDFS_BYTES_READ: 16936 HDFS_BYTES_WRITTEN: 104 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -696,7 +704,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 1566 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 1566 @@ -705,7 +713,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 17875 + HDFS_BYTES_READ: 17909 HDFS_BYTES_WRITTEN: 104 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -715,7 +723,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 1566 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 1566 @@ -724,7 +732,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 16902 + HDFS_BYTES_READ: 16936 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -734,7 +742,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 8 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 8 @@ -743,7 +751,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 17875 + HDFS_BYTES_READ: 17909 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -753,7 +761,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 8 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 8 @@ -762,7 +770,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 16902 + HDFS_BYTES_READ: 16936 HDFS_BYTES_WRITTEN: 102 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -772,7 +780,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 18 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 18 @@ -781,7 +789,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 17875 + HDFS_BYTES_READ: 17909 HDFS_BYTES_WRITTEN: 102 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -791,7 +799,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 18 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 18 @@ -800,7 +808,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 5181 + HDFS_BYTES_READ: 5215 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -810,7 +818,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: ALLOCATED_BYTES: 262144 ALLOCATED_USED_BYTES: 8400 @@ -838,7 +846,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 4809 CACHE_MISS_BYTES: 0 @@ -857,7 +865,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 21462 + HDFS_BYTES_READ: 21496 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -867,7 +875,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 2 @@ -876,7 +884,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 23522 + HDFS_BYTES_READ: 23556 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -886,7 +894,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 2 @@ -899,7 +907,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 21462 + HDFS_BYTES_READ: 21496 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -909,7 +917,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 2 @@ -918,7 +926,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 23522 + HDFS_BYTES_READ: 23556 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -928,7 +936,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 2 @@ -937,7 +945,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 4368 + HDFS_BYTES_READ: 4402 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -947,7 +955,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 6 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: ALLOCATED_BYTES: 786432 ALLOCATED_USED_BYTES: 11299 @@ -975,7 +983,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 6 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -994,7 +1002,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 20633 + HDFS_BYTES_READ: 20667 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -1004,7 +1012,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 6 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 6 @@ -1013,7 +1021,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 22540 + HDFS_BYTES_READ: 22574 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -1023,7 +1031,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 6 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 6 @@ -1036,7 +1044,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 20633 + HDFS_BYTES_READ: 20667 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -1046,7 +1054,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 6 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 6 @@ -1055,7 +1063,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 22540 + HDFS_BYTES_READ: 22574 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -1065,7 +1073,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 6 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 6 @@ -1078,7 +1086,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 20633 + HDFS_BYTES_READ: 20667 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -1088,7 +1096,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 6 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 6 @@ -1097,7 +1105,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 22540 + HDFS_BYTES_READ: 22574 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -1107,7 +1115,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 6 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 6 @@ -1130,7 +1138,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 6 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -1155,7 +1163,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 6 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3980 CACHE_MISS_BYTES: 0 @@ -1174,7 +1182,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 18751 + HDFS_BYTES_READ: 18785 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -1184,7 +1192,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 4 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 4 @@ -1193,7 +1201,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 18751 + HDFS_BYTES_READ: 18785 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -1203,7 +1211,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 4 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 4 @@ -1212,7 +1220,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 20222 + HDFS_BYTES_READ: 20256 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -1222,7 +1230,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 1000 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 4 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 4 @@ -1231,7 +1239,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd #### A masked pattern was here #### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 20222 + HDFS_BYTES_READ: 20256 HDFS_BYTES_WRITTEN: 101 HDFS_READ_OPS: 4 HDFS_LARGE_READ_OPS: 0 @@ -1241,7 +1249,7 @@ Stage-1 HIVE COUNTERS: DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 2100 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 4 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 Stage-1 LLAP IO COUNTERS: METADATA_CACHE_HIT: 1 4 diff --git ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out index 4bba265..133d6c0 100644 --- ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out @@ -133,11 +133,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6037 Data size: 24150 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6037 Data size: 24150 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) mode: hash @@ -191,11 +191,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6037 Data size: 24150 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6037 Data size: 24150 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) mode: hash @@ -321,14 +321,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6037 Data size: 24150 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean) - Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 670 Data size: 2680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 670 Data size: 2680 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) mode: hash @@ -389,14 +389,14 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6037 Data size: 24150 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean) - Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 670 Data size: 2680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 670 Data size: 2680 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) mode: hash @@ -589,18 +589,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 232 Data size: 24150 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -609,10 +609,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -655,18 +655,18 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 232 Data size: 24150 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -675,10 +675,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -786,18 +786,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 208 Data size: 24150 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -808,13 +808,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -865,18 +865,18 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 208 Data size: 24150 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -887,13 +887,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1010,18 +1010,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 208 Data size: 24150 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1032,14 +1032,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1048,13 +1048,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1110,18 +1110,18 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 208 Data size: 24150 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1132,14 +1132,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1148,13 +1148,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_table.q.out index 7b420e2..5745a7e 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_table.q.out @@ -70,9 +70,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=99) + Select Operator [SEL_1] (rows=5 width=99) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=99) + TableScan [TS_0] (rows=5 width=99) default@table_add_int_permute_select,table_add_int_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] PREHOOK: query: select insert_num,a,b from table_add_int_permute_select @@ -183,9 +183,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=114) + Select Operator [SEL_1] (rows=5 width=99) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=114) + TableScan [TS_0] (rows=5 width=99) default@table_add_int_string_permute_select,table_add_int_string_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] PREHOOK: query: select insert_num,a,b from table_add_int_string_permute_select @@ -358,9 +358,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=370) + Select Operator [SEL_1] (rows=5 width=422) Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_0] (rows=6 width=370) + TableScan [TS_0] (rows=5 width=422) default@table_change_string_group_double,table_change_string_group_double,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","b"] PREHOOK: query: select insert_num,c1,c2,c3,b from table_change_string_group_double @@ -602,9 +602,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=479) + Select Operator [SEL_1] (rows=5 width=164) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] - TableScan [TS_0] (rows=6 width=479) + TableScan [TS_0] (rows=5 width=164) default@table_change_numeric_group_string_group_multi_ints_string_group,table_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group @@ -757,9 +757,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=752) + Select Operator [SEL_1] (rows=5 width=588) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - TableScan [TS_0] (rows=6 width=752) + TableScan [TS_0] (rows=5 width=588) default@table_change_numeric_group_string_group_floating_string_group,table_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out index ecc4ee6..87dd01c 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out @@ -70,9 +70,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=99) + Select Operator [SEL_3] (rows=5 width=99) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=99) + TableScan [TS_0] (rows=5 width=99) default@table_add_int_permute_select,table_add_int_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] PREHOOK: query: select insert_num,a,b from table_add_int_permute_select @@ -183,9 +183,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=114) + Select Operator [SEL_3] (rows=5 width=99) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=114) + TableScan [TS_0] (rows=5 width=99) default@table_add_int_string_permute_select,table_add_int_string_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] PREHOOK: query: select insert_num,a,b from table_add_int_string_permute_select @@ -358,9 +358,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=370) + Select Operator [SEL_3] (rows=5 width=422) Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_0] (rows=6 width=370) + TableScan [TS_0] (rows=5 width=422) default@table_change_string_group_double,table_change_string_group_double,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","b"] PREHOOK: query: select insert_num,c1,c2,c3,b from table_change_string_group_double @@ -602,9 +602,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=479) + Select Operator [SEL_3] (rows=5 width=164) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] - TableScan [TS_0] (rows=6 width=479) + TableScan [TS_0] (rows=5 width=164) default@table_change_numeric_group_string_group_multi_ints_string_group,table_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group @@ -757,9 +757,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=752) + Select Operator [SEL_3] (rows=5 width=588) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - TableScan [TS_0] (rows=6 width=752) + TableScan [TS_0] (rows=5 width=588) default@table_change_numeric_group_string_group_floating_string_group,table_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_table.q.out index da99110..afeb41d 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_table.q.out @@ -70,9 +70,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=20) + Select Operator [SEL_1] (rows=5 width=20) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=20) + TableScan [TS_0] (rows=5 width=20) default@table_add_int_permute_select,table_add_int_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] PREHOOK: query: select insert_num,a,b from table_add_int_permute_select @@ -183,9 +183,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=21) + Select Operator [SEL_1] (rows=5 width=20) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=21) + TableScan [TS_0] (rows=5 width=20) default@table_add_int_string_permute_select,table_add_int_string_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] PREHOOK: query: select insert_num,a,b from table_add_int_string_permute_select @@ -358,9 +358,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=80) + Select Operator [SEL_1] (rows=5 width=90) Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_0] (rows=6 width=80) + TableScan [TS_0] (rows=5 width=90) default@table_change_string_group_double,table_change_string_group_double,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","b"] PREHOOK: query: select insert_num,c1,c2,c3,b from table_change_string_group_double @@ -602,9 +602,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=178) + Select Operator [SEL_1] (rows=5 width=151) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] - TableScan [TS_0] (rows=6 width=178) + TableScan [TS_0] (rows=5 width=151) default@table_change_numeric_group_string_group_multi_ints_string_group,table_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group @@ -757,9 +757,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_2] - Select Operator [SEL_1] (rows=6 width=249) + Select Operator [SEL_1] (rows=5 width=250) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - TableScan [TS_0] (rows=6 width=249) + TableScan [TS_0] (rows=5 width=250) default@table_change_numeric_group_string_group_floating_string_group,table_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out index 1fe9a13..6632bfb 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out @@ -1,9 +1,23 @@ -PREHOOK: query: CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string) +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Partitioned +-- NOTE: the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the vector SERDE methods. +-- + +CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string) row format delimited fields terminated by '|' stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@schema_evolution_data -POSTHOOK: query: CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string) +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Partitioned +-- NOTE: the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the vector SERDE methods. +-- + +CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string) row format delimited fields terminated by '|' stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default @@ -16,11 +30,25 @@ POSTHOOK: query: load data local inpath '../../data/files/schema_evolution/schem POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@schema_evolution_data -PREHOOK: query: CREATE TABLE part_add_int_permute_select(insert_num int, a INT, b STRING) PARTITIONED BY(part INT) +PREHOOK: query: ------------------------------------------------------------------------------------------ +-- SECTION: ALTER TABLE ADD COLUMNS +-- +-- +-- SUBSECTION: ALTER TABLE ADD COLUMNS: INT PERMUTE SELECT +-- +-- +CREATE TABLE part_add_int_permute_select(insert_num int, a INT, b STRING) PARTITIONED BY(part INT) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@part_add_int_permute_select -POSTHOOK: query: CREATE TABLE part_add_int_permute_select(insert_num int, a INT, b STRING) PARTITIONED BY(part INT) +POSTHOOK: query: ------------------------------------------------------------------------------------------ +-- SECTION: ALTER TABLE ADD COLUMNS +-- +-- +-- SUBSECTION: ALTER TABLE ADD COLUMNS: INT PERMUTE SELECT +-- +-- +CREATE TABLE part_add_int_permute_select(insert_num int, a INT, b STRING) PARTITIONED BY(part INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_add_int_permute_select @@ -34,11 +62,13 @@ POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).a EXPRESSION [( POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 -PREHOOK: query: alter table part_add_int_permute_select add columns(c int) +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table part_add_int_permute_select add columns(c int) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@part_add_int_permute_select PREHOOK: Output: default@part_add_int_permute_select -POSTHOOK: query: alter table part_add_int_permute_select add columns(c int) +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table part_add_int_permute_select add columns(c int) POSTHOOK: type: ALTERTABLE_ADDCOLS POSTHOOK: Input: default@part_add_int_permute_select POSTHOOK: Output: default@part_add_int_permute_select @@ -73,12 +103,14 @@ Stage-0 TableScan [TS_0] (rows=2 width=16) default@part_add_int_permute_select,part_add_int_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] -PREHOOK: query: select insert_num,part,a,b from part_add_int_permute_select +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select insert_num,part,a,b from part_add_int_permute_select PREHOOK: type: QUERY PREHOOK: Input: default@part_add_int_permute_select PREHOOK: Input: default@part_add_int_permute_select@part=1 #### A masked pattern was here #### -POSTHOOK: query: select insert_num,part,a,b from part_add_int_permute_select +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select insert_num,part,a,b from part_add_int_permute_select POSTHOOK: type: QUERY POSTHOOK: Input: default@part_add_int_permute_select POSTHOOK: Input: default@part_add_int_permute_select@part=1 @@ -120,11 +152,17 @@ POSTHOOK: query: drop table part_add_int_permute_select POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@part_add_int_permute_select POSTHOOK: Output: default@part_add_int_permute_select -PREHOOK: query: CREATE TABLE part_add_int_string_permute_select(insert_num int, a INT, b STRING) PARTITIONED BY(part INT) +PREHOOK: query: -- SUBSECTION: ALTER TABLE ADD COLUMNS: INT, STRING, PERMUTE SELECT +-- +-- +CREATE TABLE part_add_int_string_permute_select(insert_num int, a INT, b STRING) PARTITIONED BY(part INT) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@part_add_int_string_permute_select -POSTHOOK: query: CREATE TABLE part_add_int_string_permute_select(insert_num int, a INT, b STRING) PARTITIONED BY(part INT) +POSTHOOK: query: -- SUBSECTION: ALTER TABLE ADD COLUMNS: INT, STRING, PERMUTE SELECT +-- +-- +CREATE TABLE part_add_int_string_permute_select(insert_num int, a INT, b STRING) PARTITIONED BY(part INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_add_int_string_permute_select @@ -138,11 +176,13 @@ POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).a EXPRES POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 -PREHOOK: query: alter table part_add_int_string_permute_select add columns(c int, d string) +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table part_add_int_string_permute_select add columns(c int, d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@part_add_int_string_permute_select PREHOOK: Output: default@part_add_int_string_permute_select -POSTHOOK: query: alter table part_add_int_string_permute_select add columns(c int, d string) +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table part_add_int_string_permute_select add columns(c int, d string) POSTHOOK: type: ALTERTABLE_ADDCOLS POSTHOOK: Input: default@part_add_int_string_permute_select POSTHOOK: Output: default@part_add_int_string_permute_select @@ -178,12 +218,14 @@ Stage-0 TableScan [TS_0] (rows=2 width=19) default@part_add_int_string_permute_select,part_add_int_string_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] -PREHOOK: query: select insert_num,part,a,b from part_add_int_string_permute_select +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select insert_num,part,a,b from part_add_int_string_permute_select PREHOOK: type: QUERY PREHOOK: Input: default@part_add_int_string_permute_select PREHOOK: Input: default@part_add_int_string_permute_select@part=1 #### A masked pattern was here #### -POSTHOOK: query: select insert_num,part,a,b from part_add_int_string_permute_select +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select insert_num,part,a,b from part_add_int_string_permute_select POSTHOOK: type: QUERY POSTHOOK: Input: default@part_add_int_string_permute_select POSTHOOK: Input: default@part_add_int_string_permute_select@part=1 @@ -277,11 +319,25 @@ POSTHOOK: query: drop table part_add_int_string_permute_select POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@part_add_int_string_permute_select POSTHOOK: Output: default@part_add_int_string_permute_select -PREHOOK: query: CREATE TABLE part_change_string_group_double(insert_num int, c1 STRING, c2 CHAR(50), c3 VARCHAR(50), b STRING) PARTITIONED BY(part INT) +PREHOOK: query: ------------------------------------------------------------------------------------------ +-- SECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> DOUBLE +-- +-- +-- +-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> DOUBLE: (STRING, CHAR, VARCHAR) +-- +CREATE TABLE part_change_string_group_double(insert_num int, c1 STRING, c2 CHAR(50), c3 VARCHAR(50), b STRING) PARTITIONED BY(part INT) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@part_change_string_group_double -POSTHOOK: query: CREATE TABLE part_change_string_group_double(insert_num int, c1 STRING, c2 CHAR(50), c3 VARCHAR(50), b STRING) PARTITIONED BY(part INT) +POSTHOOK: query: ------------------------------------------------------------------------------------------ +-- SECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> DOUBLE +-- +-- +-- +-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> DOUBLE: (STRING, CHAR, VARCHAR) +-- +CREATE TABLE part_change_string_group_double(insert_num int, c1 STRING, c2 CHAR(50), c3 VARCHAR(50), b STRING) PARTITIONED BY(part INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_change_string_group_double @@ -299,11 +355,13 @@ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c2 EXPRESSI POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c3 EXPRESSION [(schema_evolution_data)schema_evolution_data.FieldSchema(name:double_str, type:string, comment:null), ] POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: alter table part_change_string_group_double replace columns (insert_num int, c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, b STRING) +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table part_change_string_group_double replace columns (insert_num int, c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, b STRING) PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: default@part_change_string_group_double PREHOOK: Output: default@part_change_string_group_double -POSTHOOK: query: alter table part_change_string_group_double replace columns (insert_num int, c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, b STRING) +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table part_change_string_group_double replace columns (insert_num int, c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, b STRING) POSTHOOK: type: ALTERTABLE_REPLACECOLS POSTHOOK: Input: default@part_change_string_group_double POSTHOOK: Output: default@part_change_string_group_double @@ -338,7 +396,7 @@ Stage-0 File Output Operator [FS_4] Select Operator [SEL_3] (rows=5 width=4) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_0] (rows=5 width=94) + TableScan [TS_0] (rows=5 width=56) default@part_change_string_group_double,part_change_string_group_double,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_string_group_double @@ -365,11 +423,25 @@ POSTHOOK: query: drop table part_change_string_group_double POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@part_change_string_group_double POSTHOOK: Output: default@part_change_string_group_double -PREHOOK: query: CREATE TABLE part_change_date_group_string_group_date_timestamp(insert_num int, c1 DATE, c2 DATE, c3 DATE, c4 DATE, c5 DATE, c6 TIMESTAMP, c7 TIMESTAMP, c8 TIMESTAMP, c9 TIMESTAMP, c10 TIMESTAMP, b STRING) PARTITIONED BY(part INT) +PREHOOK: query: ------------------------------------------------------------------------------------------ +-- SECTION: ALTER TABLE CHANGE COLUMNS for DATE_GROUP -> STRING_GROUP +-- +-- +-- +-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for DATE_GROUP -> STRING_GROUP: DATE,TIMESTAMP, (STRING, CHAR, CHAR trunc, VARCHAR, VARCHAR trunc) +-- +CREATE TABLE part_change_date_group_string_group_date_timestamp(insert_num int, c1 DATE, c2 DATE, c3 DATE, c4 DATE, c5 DATE, c6 TIMESTAMP, c7 TIMESTAMP, c8 TIMESTAMP, c9 TIMESTAMP, c10 TIMESTAMP, b STRING) PARTITIONED BY(part INT) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@part_change_date_group_string_group_date_timestamp -POSTHOOK: query: CREATE TABLE part_change_date_group_string_group_date_timestamp(insert_num int, c1 DATE, c2 DATE, c3 DATE, c4 DATE, c5 DATE, c6 TIMESTAMP, c7 TIMESTAMP, c8 TIMESTAMP, c9 TIMESTAMP, c10 TIMESTAMP, b STRING) PARTITIONED BY(part INT) +POSTHOOK: query: ------------------------------------------------------------------------------------------ +-- SECTION: ALTER TABLE CHANGE COLUMNS for DATE_GROUP -> STRING_GROUP +-- +-- +-- +-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for DATE_GROUP -> STRING_GROUP: DATE,TIMESTAMP, (STRING, CHAR, CHAR trunc, VARCHAR, VARCHAR trunc) +-- +CREATE TABLE part_change_date_group_string_group_date_timestamp(insert_num int, c1 DATE, c2 DATE, c3 DATE, c4 DATE, c5 DATE, c6 TIMESTAMP, c7 TIMESTAMP, c8 TIMESTAMP, c9 TIMESTAMP, c10 TIMESTAMP, b STRING) PARTITIONED BY(part INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_change_date_group_string_group_date_timestamp @@ -394,11 +466,13 @@ POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION( POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).c9 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:timestamp1, type:timestamp, comment:null), ] POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num date1 date1 date1 date1 date1 timestamp1 timestamp1 timestamp1 timestamp1 timestamp1 _c11 -PREHOOK: query: alter table part_change_date_group_string_group_date_timestamp replace columns(insert_num int, c1 STRING, c2 CHAR(50), c3 CHAR(15), c4 VARCHAR(50), c5 VARCHAR(15), c6 STRING, c7 CHAR(50), c8 CHAR(15), c9 VARCHAR(50), c10 VARCHAR(15), b STRING) +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table part_change_date_group_string_group_date_timestamp replace columns(insert_num int, c1 STRING, c2 CHAR(50), c3 CHAR(15), c4 VARCHAR(50), c5 VARCHAR(15), c6 STRING, c7 CHAR(50), c8 CHAR(15), c9 VARCHAR(50), c10 VARCHAR(15), b STRING) PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: default@part_change_date_group_string_group_date_timestamp PREHOOK: Output: default@part_change_date_group_string_group_date_timestamp -POSTHOOK: query: alter table part_change_date_group_string_group_date_timestamp replace columns(insert_num int, c1 STRING, c2 CHAR(50), c3 CHAR(15), c4 VARCHAR(50), c5 VARCHAR(15), c6 STRING, c7 CHAR(50), c8 CHAR(15), c9 VARCHAR(50), c10 VARCHAR(15), b STRING) +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table part_change_date_group_string_group_date_timestamp replace columns(insert_num int, c1 STRING, c2 CHAR(50), c3 CHAR(15), c4 VARCHAR(50), c5 VARCHAR(15), c6 STRING, c7 CHAR(50), c8 CHAR(15), c9 VARCHAR(50), c10 VARCHAR(15), b STRING) POSTHOOK: type: ALTERTABLE_REPLACECOLS POSTHOOK: Input: default@part_change_date_group_string_group_date_timestamp POSTHOOK: Output: default@part_change_date_group_string_group_date_timestamp @@ -438,7 +512,7 @@ Stage-0 File Output Operator [FS_4] Select Operator [SEL_3] (rows=6 width=4) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - TableScan [TS_0] (rows=6 width=172) + TableScan [TS_0] (rows=6 width=154) default@part_change_date_group_string_group_date_timestamp,part_change_date_group_string_group_date_timestamp,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp @@ -466,7 +540,18 @@ POSTHOOK: query: drop table part_change_date_group_string_group_date_timestamp POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@part_change_date_group_string_group_date_timestamp POSTHOOK: Output: default@part_change_date_group_string_group_date_timestamp -PREHOOK: query: CREATE TABLE part_change_numeric_group_string_group_multi_ints_string_group(insert_num int, +PREHOOK: query: ------------------------------------------------------------------------------------------ +-- SECTION: ALTER TABLE CHANGE COLUMNS for NUMERIC_GROUP -> STRING_GROUP +-- +-- +-- +-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for NUMERIC_GROUP -> STRING_GROUP: +-- (TINYINT, SMALLINT, INT, BIGINT), STRING and +-- (TINYINT, SMALLINT, INT, BIGINT), CHAR and CHAR trunc and +-- (TINYINT, SMALLINT, INT, BIGINT), VARCHAR and VARCHAR trunc +-- +-- +CREATE TABLE part_change_numeric_group_string_group_multi_ints_string_group(insert_num int, c1 tinyint, c2 smallint, c3 int, c4 bigint, c5 tinyint, c6 smallint, c7 int, c8 bigint, c9 tinyint, c10 smallint, c11 int, c12 bigint, c13 tinyint, c14 smallint, c15 int, c16 bigint, c17 tinyint, c18 smallint, c19 int, c20 bigint, @@ -474,7 +559,18 @@ PREHOOK: query: CREATE TABLE part_change_numeric_group_string_group_multi_ints_s PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@part_change_numeric_group_string_group_multi_ints_string_group -POSTHOOK: query: CREATE TABLE part_change_numeric_group_string_group_multi_ints_string_group(insert_num int, +POSTHOOK: query: ------------------------------------------------------------------------------------------ +-- SECTION: ALTER TABLE CHANGE COLUMNS for NUMERIC_GROUP -> STRING_GROUP +-- +-- +-- +-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for NUMERIC_GROUP -> STRING_GROUP: +-- (TINYINT, SMALLINT, INT, BIGINT), STRING and +-- (TINYINT, SMALLINT, INT, BIGINT), CHAR and CHAR trunc and +-- (TINYINT, SMALLINT, INT, BIGINT), VARCHAR and VARCHAR trunc +-- +-- +CREATE TABLE part_change_numeric_group_string_group_multi_ints_string_group(insert_num int, c1 tinyint, c2 smallint, c3 int, c4 bigint, c5 tinyint, c6 smallint, c7 int, c8 bigint, c9 tinyint, c10 smallint, c11 int, c12 bigint, c13 tinyint, c14 smallint, c15 int, c16 bigint, c17 tinyint, c18 smallint, c19 int, c20 bigint, @@ -537,7 +633,8 @@ insert_num part c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c 103 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL original 104 1 23 834 203332 888888857923222 23 834 203332 888888857923222 23 834 203332 888888857923222 23 834 203332 888888857923222 23 834 203332 888888857923222 original 105 1 -99 -28300 -999992 -222282153733 -99 -28300 -999992 -222282153733 -99 -28300 -999992 -222282153733 -99 -28300 -999992 -222282153733 -99 -28300 -999992 -222282153733 original -PREHOOK: query: alter table part_change_numeric_group_string_group_multi_ints_string_group replace columns (insert_num int, +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table part_change_numeric_group_string_group_multi_ints_string_group replace columns (insert_num int, c1 STRING, c2 STRING, c3 STRING, c4 STRING, c5 CHAR(50), c6 CHAR(50), c7 CHAR(50), c8 CHAR(50), c9 CHAR(5), c10 CHAR(5), c11 CHAR(5), c12 CHAR(5), c13 VARCHAR(50), c14 VARCHAR(50), c15 VARCHAR(50), c16 VARCHAR(50), c17 VARCHAR(5), c18 VARCHAR(5), c19 VARCHAR(5), c20 VARCHAR(5), @@ -545,7 +642,8 @@ PREHOOK: query: alter table part_change_numeric_group_string_group_multi_ints_st PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: default@part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: Output: default@part_change_numeric_group_string_group_multi_ints_string_group -POSTHOOK: query: alter table part_change_numeric_group_string_group_multi_ints_string_group replace columns (insert_num int, +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table part_change_numeric_group_string_group_multi_ints_string_group replace columns (insert_num int, c1 STRING, c2 STRING, c3 STRING, c4 STRING, c5 CHAR(50), c6 CHAR(50), c7 CHAR(50), c8 CHAR(50), c9 CHAR(5), c10 CHAR(5), c11 CHAR(5), c12 CHAR(5), c13 VARCHAR(50), c14 VARCHAR(50), c15 VARCHAR(50), c16 VARCHAR(50), c17 VARCHAR(5), c18 VARCHAR(5), c19 VARCHAR(5), c20 VARCHAR(5), @@ -607,7 +705,7 @@ Stage-0 File Output Operator [FS_4] Select Operator [SEL_3] (rows=6 width=4) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22"] - TableScan [TS_0] (rows=6 width=182) + TableScan [TS_0] (rows=6 width=153) default@part_change_numeric_group_string_group_multi_ints_string_group,part_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group @@ -635,7 +733,14 @@ POSTHOOK: query: drop table part_change_numeric_group_string_group_multi_ints_st POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@part_change_numeric_group_string_group_multi_ints_string_group POSTHOOK: Output: default@part_change_numeric_group_string_group_multi_ints_string_group -PREHOOK: query: CREATE TABLE part_change_numeric_group_string_group_floating_string_group(insert_num int, +PREHOOK: query: -- +-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for NUMERIC_GROUP -> STRING_GROUP: +-- (DECIMAL, FLOAT, DOUBLE), STRING and +-- (DECIMAL, FLOAT, DOUBLE), CHAR and CHAR trunc and +-- (DECIMAL, FLOAT, DOUBLE), VARCHAR and VARCHAR trunc +-- +-- +CREATE TABLE part_change_numeric_group_string_group_floating_string_group(insert_num int, c1 decimal(38,18), c2 float, c3 double, c4 decimal(38,18), c5 float, c6 double, c7 decimal(38,18), c8 float, c9 double, c10 decimal(38,18), c11 float, c12 double, c13 decimal(38,18), c14 float, c15 double, @@ -643,7 +748,14 @@ PREHOOK: query: CREATE TABLE part_change_numeric_group_string_group_floating_str PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@part_change_numeric_group_string_group_floating_string_group -POSTHOOK: query: CREATE TABLE part_change_numeric_group_string_group_floating_string_group(insert_num int, +POSTHOOK: query: -- +-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for NUMERIC_GROUP -> STRING_GROUP: +-- (DECIMAL, FLOAT, DOUBLE), STRING and +-- (DECIMAL, FLOAT, DOUBLE), CHAR and CHAR trunc and +-- (DECIMAL, FLOAT, DOUBLE), VARCHAR and VARCHAR trunc +-- +-- +CREATE TABLE part_change_numeric_group_string_group_floating_string_group(insert_num int, c1 decimal(38,18), c2 float, c3 double, c4 decimal(38,18), c5 float, c6 double, c7 decimal(38,18), c8 float, c9 double, c10 decimal(38,18), c11 float, c12 double, c13 decimal(38,18), c14 float, c15 double, @@ -701,7 +813,8 @@ insert_num part c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 b 103 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL original 104 1 66475.561431000000000000 -100.35978 30.774 66475.561431000000000000 -100.35978 30.774 66475.561431000000000000 -100.35978 30.774 66475.561431000000000000 -100.35978 30.774 66475.561431000000000000 -100.35978 30.774 original 105 1 9250340.750000000000000000 NULL 46114.28 9250340.750000000000000000 NULL 46114.28 9250340.750000000000000000 NULL 46114.28 9250340.750000000000000000 NULL 46114.28 9250340.750000000000000000 NULL 46114.28 original -PREHOOK: query: alter table part_change_numeric_group_string_group_floating_string_group replace columns (insert_num int, +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table part_change_numeric_group_string_group_floating_string_group replace columns (insert_num int, c1 STRING, c2 STRING, c3 STRING, c4 CHAR(50), c5 CHAR(50), c6 CHAR(50), c7 CHAR(7), c8 CHAR(7), c9 CHAR(7), c10 VARCHAR(50), c11 VARCHAR(50), c12 VARCHAR(50), c13 VARCHAR(7), c14 VARCHAR(7), c15 VARCHAR(7), @@ -709,7 +822,8 @@ PREHOOK: query: alter table part_change_numeric_group_string_group_floating_stri PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: default@part_change_numeric_group_string_group_floating_string_group PREHOOK: Output: default@part_change_numeric_group_string_group_floating_string_group -POSTHOOK: query: alter table part_change_numeric_group_string_group_floating_string_group replace columns (insert_num int, +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table part_change_numeric_group_string_group_floating_string_group replace columns (insert_num int, c1 STRING, c2 STRING, c3 STRING, c4 CHAR(50), c5 CHAR(50), c6 CHAR(50), c7 CHAR(7), c8 CHAR(7), c9 CHAR(7), c10 VARCHAR(50), c11 VARCHAR(50), c12 VARCHAR(50), c13 VARCHAR(7), c14 VARCHAR(7), c15 VARCHAR(7), @@ -766,7 +880,7 @@ Stage-0 File Output Operator [FS_4] Select Operator [SEL_3] (rows=6 width=4) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - TableScan [TS_0] (rows=6 width=253) + TableScan [TS_0] (rows=6 width=231) default@part_change_numeric_group_string_group_floating_string_group,part_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group @@ -794,14 +908,30 @@ POSTHOOK: query: drop table part_change_numeric_group_string_group_floating_stri POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@part_change_numeric_group_string_group_floating_string_group POSTHOOK: Output: default@part_change_numeric_group_string_group_floating_string_group -PREHOOK: query: CREATE TABLE part_change_string_group_string_group_string(insert_num int, +PREHOOK: query: ------------------------------------------------------------------------------------------ +-- SECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> STRING_GROUP +-- +-- +-- +-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> STRING_GROUP: STRING, (CHAR, CHAR trunc, VARCHAR, VARCHAR trunc) and +-- CHAR, (VARCHAR, VARCHAR trunc, STRING) and VARCHAR, (CHAR, CHAR trunc, STRING) +-- +CREATE TABLE part_change_string_group_string_group_string(insert_num int, c1 string, c2 string, c3 string, c4 string, c5 CHAR(50), c6 CHAR(50), c7 CHAR(50), c8 VARCHAR(50), c9 VARCHAR(50), c10 VARCHAR(50), b STRING) PARTITIONED BY(part INT) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@part_change_string_group_string_group_string -POSTHOOK: query: CREATE TABLE part_change_string_group_string_group_string(insert_num int, +POSTHOOK: query: ------------------------------------------------------------------------------------------ +-- SECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> STRING_GROUP +-- +-- +-- +-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for STRING_GROUP -> STRING_GROUP: STRING, (CHAR, CHAR trunc, VARCHAR, VARCHAR trunc) and +-- CHAR, (VARCHAR, VARCHAR trunc, STRING) and VARCHAR, (CHAR, CHAR trunc, STRING) +-- +CREATE TABLE part_change_string_group_string_group_string(insert_num int, c1 string, c2 string, c3 string, c4 string, c5 CHAR(50), c6 CHAR(50), c7 CHAR(50), c8 VARCHAR(50), c9 VARCHAR(50), c10 VARCHAR(50), b STRING) PARTITIONED BY(part INT) @@ -853,14 +983,16 @@ insert_num part c1 c2 c3 c4 b 103 1 original 104 1 I cooked I cooked I cooked I cooked original 105 1 200 200 200 200 original -PREHOOK: query: alter table part_change_string_group_string_group_string replace columns (insert_num int, +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table part_change_string_group_string_group_string replace columns (insert_num int, c1 CHAR(50), c2 CHAR(9), c3 VARCHAR(50), c4 CHAR(9), c5 VARCHAR(50), c6 VARCHAR(9), c7 STRING, c8 CHAR(50), c9 CHAR(9), c10 STRING, b STRING) PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: default@part_change_string_group_string_group_string PREHOOK: Output: default@part_change_string_group_string_group_string -POSTHOOK: query: alter table part_change_string_group_string_group_string replace columns (insert_num int, +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table part_change_string_group_string_group_string replace columns (insert_num int, c1 CHAR(50), c2 CHAR(9), c3 VARCHAR(50), c4 CHAR(9), c5 VARCHAR(50), c6 VARCHAR(9), c7 STRING, c8 CHAR(50), c9 CHAR(9), c10 STRING, b STRING) @@ -911,7 +1043,7 @@ Stage-0 File Output Operator [FS_4] Select Operator [SEL_3] (rows=6 width=4) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - TableScan [TS_0] (rows=6 width=200) + TableScan [TS_0] (rows=6 width=70) default@part_change_string_group_string_group_string,part_change_string_group_string_group_string,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string @@ -939,7 +1071,18 @@ POSTHOOK: query: drop table part_change_string_group_string_group_string POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@part_change_string_group_string_group_string POSTHOOK: Output: default@part_change_string_group_string_group_string -PREHOOK: query: CREATE TABLE part_change_lower_to_higher_numeric_group_tinyint_to_bigint(insert_num int, +PREHOOK: query: ------------------------------------------------------------------------------------------ +-- SECTION: ALTER TABLE CHANGE COLUMNS for "lower" type to "higher" NUMERIC_GROUP +-- +-- +-- +-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for "lower" type to "higher" NUMERIC_GROUP: +-- TINYINT, (SMALLINT, INT, BIGINT, DECIMAL, FLOAT, DOUBLE) and +-- SMALLINT, (INT, BIGINT, DECIMAL, FLOAT, DOUBLE) and +-- INT, (BIGINT, DECIMAL, FLOAT, DOUBLE) and +-- BIGINT, (DECIMAL, FLOAT, DOUBLE) +-- +CREATE TABLE part_change_lower_to_higher_numeric_group_tinyint_to_bigint(insert_num int, c1 tinyint, c2 tinyint, c3 tinyint, c4 tinyint, c5 tinyint, c6 tinyint, c7 smallint, c8 smallint, c9 smallint, c10 smallint, c11 smallint, c12 int, c13 int, c14 int, c15 int, @@ -948,7 +1091,18 @@ PREHOOK: query: CREATE TABLE part_change_lower_to_higher_numeric_group_tinyint_t PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@part_change_lower_to_higher_numeric_group_tinyint_to_bigint -POSTHOOK: query: CREATE TABLE part_change_lower_to_higher_numeric_group_tinyint_to_bigint(insert_num int, +POSTHOOK: query: ------------------------------------------------------------------------------------------ +-- SECTION: ALTER TABLE CHANGE COLUMNS for "lower" type to "higher" NUMERIC_GROUP +-- +-- +-- +-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for "lower" type to "higher" NUMERIC_GROUP: +-- TINYINT, (SMALLINT, INT, BIGINT, DECIMAL, FLOAT, DOUBLE) and +-- SMALLINT, (INT, BIGINT, DECIMAL, FLOAT, DOUBLE) and +-- INT, (BIGINT, DECIMAL, FLOAT, DOUBLE) and +-- BIGINT, (DECIMAL, FLOAT, DOUBLE) +-- +CREATE TABLE part_change_lower_to_higher_numeric_group_tinyint_to_bigint(insert_num int, c1 tinyint, c2 tinyint, c3 tinyint, c4 tinyint, c5 tinyint, c6 tinyint, c7 smallint, c8 smallint, c9 smallint, c10 smallint, c11 smallint, c12 int, c13 int, c14 int, c15 int, @@ -1012,7 +1166,8 @@ insert_num part c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 b 103 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL original 104 1 23 23 23 23 23 23 834 834 834 834 834 203332 203332 203332 203332 888888857923222 888888857923222 888888857923222 original 105 1 -99 -99 -99 -99 -99 -99 -28300 -28300 -28300 -28300 -28300 -999992 -999992 -999992 -999992 -222282153733 -222282153733 -222282153733 original -PREHOOK: query: alter table part_change_lower_to_higher_numeric_group_tinyint_to_bigint replace columns (insert_num int, +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table part_change_lower_to_higher_numeric_group_tinyint_to_bigint replace columns (insert_num int, c1 SMALLINT, c2 INT, c3 BIGINT, c4 decimal(38,18), c5 FLOAT, c6 DOUBLE, c7 INT, c8 BIGINT, c9 decimal(38,18), c10 FLOAT, c11 DOUBLE, c12 BIGINT, c13 decimal(38,18), c14 FLOAT, c15 DOUBLE, @@ -1021,7 +1176,8 @@ PREHOOK: query: alter table part_change_lower_to_higher_numeric_group_tinyint_to PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: default@part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: Output: default@part_change_lower_to_higher_numeric_group_tinyint_to_bigint -POSTHOOK: query: alter table part_change_lower_to_higher_numeric_group_tinyint_to_bigint replace columns (insert_num int, +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table part_change_lower_to_higher_numeric_group_tinyint_to_bigint replace columns (insert_num int, c1 SMALLINT, c2 INT, c3 BIGINT, c4 decimal(38,18), c5 FLOAT, c6 DOUBLE, c7 INT, c8 BIGINT, c9 decimal(38,18), c10 FLOAT, c11 DOUBLE, c12 BIGINT, c13 decimal(38,18), c14 FLOAT, c15 DOUBLE, @@ -1112,14 +1268,24 @@ POSTHOOK: query: drop table part_change_lower_to_higher_numeric_group_tinyint_to POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@part_change_lower_to_higher_numeric_group_tinyint_to_bigint POSTHOOK: Output: default@part_change_lower_to_higher_numeric_group_tinyint_to_bigint -PREHOOK: query: CREATE TABLE part_change_lower_to_higher_numeric_group_decimal_to_float(insert_num int, +PREHOOK: query: -- +-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for "lower" type to "higher" NUMERIC_GROUP: +-- DECIMAL, (FLOAT, DOUBLE) and +-- FLOAT, (DOUBLE) +-- +CREATE TABLE part_change_lower_to_higher_numeric_group_decimal_to_float(insert_num int, c1 decimal(38,18), c2 decimal(38,18), c3 float, b STRING) PARTITIONED BY(part INT) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@part_change_lower_to_higher_numeric_group_decimal_to_float -POSTHOOK: query: CREATE TABLE part_change_lower_to_higher_numeric_group_decimal_to_float(insert_num int, +POSTHOOK: query: -- +-- SUBSECTION: ALTER TABLE CHANGE COLUMNS for "lower" type to "higher" NUMERIC_GROUP: +-- DECIMAL, (FLOAT, DOUBLE) and +-- FLOAT, (DOUBLE) +-- +CREATE TABLE part_change_lower_to_higher_numeric_group_decimal_to_float(insert_num int, c1 decimal(38,18), c2 decimal(38,18), c3 float, b STRING) PARTITIONED BY(part INT) @@ -1162,11 +1328,13 @@ insert_num part c1 c2 c3 b 103 1 NULL NULL NULL original 104 1 66475.561431000000000000 66475.561431000000000000 -100.35978 original 105 1 9250340.750000000000000000 9250340.750000000000000000 NULL original -PREHOOK: query: alter table part_change_lower_to_higher_numeric_group_decimal_to_float replace columns (insert_num int, c1 float, c2 double, c3 DOUBLE, b STRING) +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table part_change_lower_to_higher_numeric_group_decimal_to_float replace columns (insert_num int, c1 float, c2 double, c3 DOUBLE, b STRING) PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: default@part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: Output: default@part_change_lower_to_higher_numeric_group_decimal_to_float -POSTHOOK: query: alter table part_change_lower_to_higher_numeric_group_decimal_to_float replace columns (insert_num int, c1 float, c2 double, c3 DOUBLE, b STRING) +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table part_change_lower_to_higher_numeric_group_decimal_to_float replace columns (insert_num int, c1 float, c2 double, c3 DOUBLE, b STRING) POSTHOOK: type: ALTERTABLE_REPLACECOLS POSTHOOK: Input: default@part_change_lower_to_higher_numeric_group_decimal_to_float POSTHOOK: Output: default@part_change_lower_to_higher_numeric_group_decimal_to_float diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out index 06d2372..82bc8a4 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out @@ -70,9 +70,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=20) + Select Operator [SEL_3] (rows=5 width=20) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=20) + TableScan [TS_0] (rows=5 width=20) default@table_add_int_permute_select,table_add_int_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] PREHOOK: query: select insert_num,a,b from table_add_int_permute_select @@ -183,9 +183,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=21) + Select Operator [SEL_3] (rows=5 width=20) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=21) + TableScan [TS_0] (rows=5 width=20) default@table_add_int_string_permute_select,table_add_int_string_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] PREHOOK: query: select insert_num,a,b from table_add_int_string_permute_select @@ -358,9 +358,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=80) + Select Operator [SEL_3] (rows=5 width=90) Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_0] (rows=6 width=80) + TableScan [TS_0] (rows=5 width=90) default@table_change_string_group_double,table_change_string_group_double,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","b"] PREHOOK: query: select insert_num,c1,c2,c3,b from table_change_string_group_double @@ -602,9 +602,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=178) + Select Operator [SEL_3] (rows=5 width=151) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] - TableScan [TS_0] (rows=6 width=178) + TableScan [TS_0] (rows=5 width=151) default@table_change_numeric_group_string_group_multi_ints_string_group,table_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group @@ -757,9 +757,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=249) + Select Operator [SEL_3] (rows=5 width=250) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - TableScan [TS_0] (rows=6 width=249) + TableScan [TS_0] (rows=5 width=250) default@table_change_numeric_group_string_group_floating_string_group,table_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out index 06d2372..82bc8a4 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out @@ -70,9 +70,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=20) + Select Operator [SEL_3] (rows=5 width=20) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=20) + TableScan [TS_0] (rows=5 width=20) default@table_add_int_permute_select,table_add_int_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] PREHOOK: query: select insert_num,a,b from table_add_int_permute_select @@ -183,9 +183,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=21) + Select Operator [SEL_3] (rows=5 width=20) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=21) + TableScan [TS_0] (rows=5 width=20) default@table_add_int_string_permute_select,table_add_int_string_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"] PREHOOK: query: select insert_num,a,b from table_add_int_string_permute_select @@ -358,9 +358,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=80) + Select Operator [SEL_3] (rows=5 width=90) Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_0] (rows=6 width=80) + TableScan [TS_0] (rows=5 width=90) default@table_change_string_group_double,table_change_string_group_double,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","b"] PREHOOK: query: select insert_num,c1,c2,c3,b from table_change_string_group_double @@ -602,9 +602,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=178) + Select Operator [SEL_3] (rows=5 width=151) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] - TableScan [TS_0] (rows=6 width=178) + TableScan [TS_0] (rows=5 width=151) default@table_change_numeric_group_string_group_multi_ints_string_group,table_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from table_change_numeric_group_string_group_multi_ints_string_group @@ -757,9 +757,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_4] - Select Operator [SEL_3] (rows=6 width=249) + Select Operator [SEL_3] (rows=5 width=250) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - TableScan [TS_0] (rows=6 width=249) + TableScan [TS_0] (rows=5 width=250) default@table_change_numeric_group_string_group_floating_string_group,table_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:NONE,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] PREHOOK: query: select insert_num,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from table_change_numeric_group_string_group_floating_string_group diff --git ql/src/test/results/clientpositive/llap/sqlmerge.q.out ql/src/test/results/clientpositive/llap/sqlmerge.q.out index 486e812..068b75f 100644 --- ql/src/test/results/clientpositive/llap/sqlmerge.q.out +++ ql/src/test/results/clientpositive/llap/sqlmerge.q.out @@ -25,27 +25,24 @@ WHEN MATCHED THEN UPDATE SET b = 7 WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-5 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-0 - Stage-2 depends on stages: Stage-5 - Stage-7 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-5 - Stage-8 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-9 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (SIMPLE_EDGE) - Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -61,7 +58,7 @@ STAGE PLANS: value expressions: ROW__ID (type: struct) Execution mode: llap LLAP IO: may be used (ACID table) - Map 7 + Map 6 Map Operator Tree: TableScan alias: s @@ -111,25 +108,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: int) Filter Operator - predicate: (_col0 = _col5) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col4 (type: struct) - outputColumnNames: _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - keys: _col4 (type: struct) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - sort order: + - Map-reduce partition columns: _col0 (type: struct) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: bigint) - Filter Operator predicate: _col0 is null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -174,30 +152,6 @@ STAGE PLANS: Reducer 5 Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (_col1 > 1) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: cardinality_violation(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - Reducer 6 - Execution mode: llap - Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: int) outputColumnNames: _col0, _col1 @@ -211,7 +165,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl - Stage: Stage-5 + Stage: Stage-4 Dependency Collection Stage: Stage-0 @@ -224,7 +178,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl - Stage: Stage-6 + Stage: Stage-5 Stats-Aggr Operator Stage: Stage-2 @@ -237,20 +191,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl - Stage: Stage-7 - Stats-Aggr Operator - - Stage: Stage-3 - Move Operator - tables: - replace: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_tmp_table - - Stage: Stage-8 + Stage: Stage-6 Stats-Aggr Operator Stage: Stage-1 @@ -263,6 +204,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl - Stage: Stage-9 + Stage: Stage-7 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/llap/subquery_in.q.out ql/src/test/results/clientpositive/llap/subquery_in.q.out index 63432a0..eae6f1d 100644 --- ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -26,19 +26,16 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -71,10 +68,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -349,19 +346,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: p_size is not null (type: boolean) + Select Operator + expressions: p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: UDFToDouble(_col1) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col1) (type: double) Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: UDFToDouble(_col1) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col1) (type: double) - Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: int) + value expressions: _col0 (type: string), _col1 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 @@ -447,19 +441,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -502,21 +493,23 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### almond antique medium spring khaki 6 almond antique salmon chartreuse burlywood 6 -PREHOOK: query: explain -select p_mfgr, p_name, p_size -from part b where b.p_size in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr - ) +PREHOOK: query: explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value = a.value and a.key > '9' + ) PREHOOK: type: QUERY -POSTHOOK: query: explain -select p_mfgr, p_name, p_size -from part b where b.p_size in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr - ) +POSTHOOK: query: explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value = a.value and a.key > '9' + ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -527,59 +520,63 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan - alias: part - Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + alias: a + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 8 + Map 6 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: p_mfgr (type: string) + keys: value (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -589,138 +586,69 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col2 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col2, _col5 - Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col5 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col1 (type: int) - outputColumnNames: _col2, _col1 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col1) - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col2 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: string), _col1 (type: int) + keys: _col0 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -728,202 +656,15 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select p_mfgr, p_name, p_size -from part b where b.p_size in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr - ) +PREHOOK: query: select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value = a.value and a.key > '9' + ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size -from part b where b.p_size in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr - ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -Manufacturer#1 almond antique burnished rose metallic 2 -Manufacturer#1 almond antique burnished rose metallic 2 -Manufacturer#2 almond aquamarine midnight light salmon 2 -Manufacturer#3 almond antique misty red olive 1 -Manufacturer#4 almond aquamarine yellow dodger mint 7 -Manufacturer#5 almond antique sky peru orange 2 -PREHOOK: query: explain -select * -from src b -where b.key in - (select distinct a.key - from src a - where b.value = a.value and a.key > '9' - ) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select * -from src b -where b.key in - (select distinct a.key - from src a - where b.value = a.value and a.key > '9' - ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: value (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * -from src b -where b.key in - (select distinct a.key - from src a - where b.value = a.value and a.key > '9' - ) -PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: select * from src b @@ -1019,7 +760,7 @@ STAGE PLANS: alias: li Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) @@ -1039,7 +780,7 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + predicate: (l_shipmode = 'AIR') (type: boolean) Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int) @@ -1657,19 +1398,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: p_size is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (_col5 - 1) (type: int) - sort order: + - Map-reduce partition columns: (_col5 - 1) (type: int) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1726,19 +1464,16 @@ STAGE PLANS: expressions: _col1 (type: int) outputColumnNames: _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -1800,19 +1535,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (p_partkey is not null and p_size is not null) (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: (_col0 * _col5) (type: int) + sort order: + + Map-reduce partition columns: (_col0 * _col5) (type: int) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (_col0 * _col5) (type: int) - sort order: + - Map-reduce partition columns: (_col0 * _col5) (type: int) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1869,19 +1601,16 @@ STAGE PLANS: expressions: _col1 (type: int) outputColumnNames: _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -2109,19 +1838,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: p_retailprice is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: floor(_col7) (type: bigint) + sort order: + + Map-reduce partition columns: floor(_col7) (type: bigint) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: floor(_col7) (type: bigint) - sort order: + - Map-reduce partition columns: floor(_col7) (type: bigint) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -2175,26 +1901,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: double) - outputColumnNames: _col1 + expressions: floor(_col1) (type: bigint) + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: floor(_col1) is not null (type: boolean) - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: floor(_col1) (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -2970,19 +2689,16 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2990,19 +2706,16 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: value (type: string) - mode: hash - outputColumnNames: _col0 + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 @@ -3061,15 +2774,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Map-reduce partition columns: _col1 (type: bigint) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -3114,19 +2824,16 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: bigint) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 9 Execution mode: llap Reduce Operator Tree: @@ -3506,19 +3213,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: p_size is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (_col5 - 1) (type: int) - sort order: + - Map-reduce partition columns: (_col5 - 1) (type: int) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -3587,19 +3291,16 @@ STAGE PLANS: expressions: _col1 (type: int) outputColumnNames: _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -3662,19 +3363,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: p_size is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (_col5 - 1) (type: int) - sort order: + - Map-reduce partition columns: (_col5 - 1) (type: int) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -3747,19 +3445,16 @@ STAGE PLANS: expressions: _col1 (type: int) outputColumnNames: _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -3816,19 +3511,16 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -3836,23 +3528,20 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: p_name is not null (type: boolean) + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -3860,23 +3549,20 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: p_brand is not null (type: boolean) + Select Operator + expressions: p_brand (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_brand (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -5070,9 +4756,39 @@ POSTHOOK: Input: default@part 85768 86428 90681 -PREHOOK: query: explain select * from part where p_size IN (select count(*) from part pp where pp.p_type = part.p_type) +PREHOOK: query: create table t(i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t values(1) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values(1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values(0) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values(0) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table tempty(i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tempty +POSTHOOK: query: create table tempty(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tempty +PREHOOK: query: explain select * from t where i IN (select count(*) from tempty) PREHOOK: type: QUERY -POSTHOOK: query: explain select * from part where p_size IN (select count(*) from part pp where pp.p_type = part.p_type) +POSTHOOK: query: explain select * from t where i IN (select count(*) from tempty) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -5083,1697 +4799,29 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) - Reducer 11 <- Reducer 10 (SIMPLE_EDGE) - Reducer 12 <- Reducer 11 (SIMPLE_EDGE) - Reducer 14 <- Map 13 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: part - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + alias: t + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col4 (type: string) - sort order: + - Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 13 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string) - mode: hash + expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: UDFToLong(_col0) (type: bigint) sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: UDFToLong(_col0) (type: bigint) + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_type (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 9 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_type (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Reducer 10 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 11 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 12 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 14 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col4 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col10, true) > 0) (type: boolean) - Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col4 (type: string), UDFToLong(_col5) (type: bigint) - sort order: ++ - Map-reduce partition columns: _col4 (type: string), UDFToLong(_col5) (type: bigint) - Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: string), UDFToLong(_col5) (type: bigint) - 1 _col1 (type: string), _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from part where p_size IN (select count(*) from part pp where pp.p_type = part.p_type) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select * from part where p_size IN (select count(*) from part pp where pp.p_type = part.p_type) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -PREHOOK: query: explain select * from part where p_size in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from part where p_size in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), UDFToDouble(_col5) (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), UDFToDouble(_col5) (type: double) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), UDFToDouble(_col5) (type: double) - 1 _col1 (type: int), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: int), _col1 (type: int) - outputColumnNames: _col2, _col1 - Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: avg(_col1) - keys: _col2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: double) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: double), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int), _col0 (type: double) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col0 (type: double) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from part where p_size in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select * from part where p_size in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ -110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously -112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even -144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about -146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref -15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve -191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle -192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir -195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de -33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -PREHOOK: query: explain select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col5 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col5 (type: int) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col5 (type: int) - 1 _col1 (type: int), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 338 Data size: 4056 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col0 > _col2) (type: boolean) - Statistics: Num rows: 112 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: int), _col1 (type: int) - outputColumnNames: _col2, _col1 - Statistics: Num rows: 112 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col1) - keys: _col2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col0 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -PREHOOK: query: select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref -PREHOOK: query: explain select * from part where p_size NOT IN (select count(*) from part pp where pp.p_type = part.p_type) -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from part where p_size NOT IN (select count(*) from part pp where pp.p_type = part.p_type) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) - Reducer 12 <- Reducer 11 (SIMPLE_EDGE) - Reducer 14 <- Map 13 (SIMPLE_EDGE) - Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) - Reducer 17 <- Reducer 16 (SIMPLE_EDGE) - Reducer 18 <- Reducer 17 (SIMPLE_EDGE) - Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 21 <- Map 20 (SIMPLE_EDGE) - Reducer 23 <- Map 22 (SIMPLE_EDGE) - Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 19 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col4 (type: string) - sort order: + - Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 10 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_type (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 13 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 15 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_type (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 20 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 22 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_size (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_type (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 8 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Reducer 11 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 12 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(), count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 14 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 16 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 17 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: bigint), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string) - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 18 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint), _col1 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: boolean) - Reducer 19 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 UDFToLong(_col0) (type: bigint) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col3 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col3 (type: int), _col1 (type: string) - Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: boolean) - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col4 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col10, true) > 0) (type: boolean) - Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col4 (type: string) - sort order: + - Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Reducer 21 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 23 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: UDFToLong(_col0) (type: bigint) - sort order: + - Map-reduce partition columns: UDFToLong(_col0) (type: bigint) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col4 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13 - Statistics: Num rows: 8 Data size: 5080 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col5 (type: int), _col4 (type: string) - sort order: ++ - Map-reduce partition columns: _col5 (type: int), _col4 (type: string) - Statistics: Num rows: 8 Data size: 5080 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: bigint), _col13 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col5 (type: int), _col4 (type: string) - 1 _col3 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 - Statistics: Num rows: 8 Data size: 5112 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: CASE WHEN ((_col12 = 0)) THEN (true) WHEN (_col12 is null) THEN (true) WHEN (_col16 is not null) THEN (false) WHEN (_col5 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (false) ELSE (true) END (type: boolean) - Statistics: Num rows: 4 Data size: 2556 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 12 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from part where p_size NOT IN (select count(*) from part pp where pp.p_type = part.p_type) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select * from part where p_size NOT IN (select count(*) from part pp where pp.p_type = part.p_type) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ -110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously -112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car -132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even -144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about -146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref -15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve -191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle -192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir -195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de -33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -PREHOOK: query: explain select * from part where p_size not in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from part where p_size not in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) - Reducer 12 <- Reducer 11 (SIMPLE_EDGE) - Reducer 14 <- Map 13 (SIMPLE_EDGE) - Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) - Reducer 17 <- Reducer 16 (SIMPLE_EDGE) - Reducer 18 <- Reducer 17 (SIMPLE_EDGE) - Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 21 <- Map 20 (SIMPLE_EDGE) - Reducer 23 <- Map 22 (SIMPLE_EDGE) - Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 19 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 10 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 13 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 15 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 20 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 22 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_size (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 8 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Reducer 11 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: int), _col1 (type: int) - outputColumnNames: _col2, _col1 - Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: avg(_col1) - keys: _col2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) - Reducer 12 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(), count(_col1) - keys: _col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 14 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 16 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: int), _col1 (type: int) - outputColumnNames: _col2, _col1 - Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: avg(_col1) - keys: _col2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) - Reducer 17 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: double), _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: int) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 18 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: double), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: double), _col1 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: boolean) - Reducer 19 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col3 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: boolean) - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col10, true) > 0) (type: boolean) - Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Reducer 21 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 23 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13 - Statistics: Num rows: 8 Data size: 5080 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col5 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col5 (type: int) - Statistics: Num rows: 8 Data size: 5080 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: bigint), _col13 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int), _col5 (type: int) - 1 _col1 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 - Statistics: Num rows: 8 Data size: 5112 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: CASE WHEN ((_col12 = 0)) THEN (true) WHEN (_col12 is null) THEN (true) WHEN (_col16 is not null) THEN (false) WHEN (_col5 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (false) ELSE (true) END (type: boolean) - Statistics: Num rows: 4 Data size: 2556 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2 - Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col2 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from part where p_size not in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select * from part where p_size not in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -PREHOOK: query: create table t(i int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t -POSTHOOK: query: create table t(i int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t -PREHOOK: query: insert into t values(1) -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values(1) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: insert into t values(0) -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values(0) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: create table tempty(i int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tempty -POSTHOOK: query: create table tempty(i int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tempty -PREHOOK: query: explain select * from t where i IN (select count(*) from tempty) -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t where i IN (select count(*) from tempty) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: i is not null (type: boolean) - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToLong(_col0) (type: bigint) - sort order: + - Map-reduce partition columns: UDFToLong(_col0) (type: bigint) - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 3 + Map 3 Map Operator Tree: TableScan alias: tempty @@ -6817,19 +4865,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -6930,229 +4975,3 @@ POSTHOOK: query: drop table tempty POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tempty POSTHOOK: Output: default@tempty -PREHOOK: query: create table t(i int, j int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t -POSTHOOK: query: create table t(i int, j int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t -PREHOOK: query: insert into t values(0,1), (0,2) -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values(0,1), (0,2) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: t.j EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: create table tt(i int, j int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tt -POSTHOOK: query: create table tt(i int, j int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tt -PREHOOK: query: insert into tt values(0,3) -PREHOOK: type: QUERY -PREHOOK: Output: default@tt -POSTHOOK: query: insert into tt values(0,3) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@tt -POSTHOOK: Lineage: tt.i EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: tt.j EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain select * from t where i IN (select sum(i) from tt where tt.j = t.j) -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t where i IN (select sum(i) from tt where tt.j = t.j) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), j (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToLong(_col0) (type: bigint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: UDFToLong(_col0) (type: bigint), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: tt - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), j (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: j (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToLong(_col0) (type: bigint), _col1 (type: int) - 1 _col0 (type: bigint), _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col0 (type: int) - outputColumnNames: _col2, _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - keys: _col2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint), _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from t where i IN (select sum(i) from tt where tt.j = t.j) -PREHOOK: type: QUERY -PREHOOK: Input: default@t -PREHOOK: Input: default@tt -#### A masked pattern was here #### -POSTHOOK: query: select * from t where i IN (select sum(i) from tt where tt.j = t.j) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t -POSTHOOK: Input: default@tt -#### A masked pattern was here #### -PREHOOK: query: drop table t -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@t -PREHOOK: Output: default@t -POSTHOOK: query: drop table t -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@t -POSTHOOK: Output: default@t -PREHOOK: query: drop table tt -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@tt -PREHOOK: Output: default@tt -POSTHOOK: query: drop table tt -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@tt -POSTHOOK: Output: default@tt diff --git ql/src/test/results/clientpositive/llap/subquery_multi.q.out ql/src/test/results/clientpositive/llap/subquery_multi.q.out index bbdce1d..90cdf3e 100644 --- ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -99,19 +99,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_size is not null and p_brand is not null) (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: int) - sort order: + - Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -119,19 +116,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_size is not null (type: boolean) + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_size (type: int) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 6 @@ -139,19 +133,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_brand is not null (type: boolean) + Group By Operator + keys: p_brand (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_brand (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -257,7 +248,7 @@ POSTHOOK: Input: default@part_null 78487 NULL Manufacturer#6 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra 15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null) @@ -284,19 +275,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 @@ -304,19 +292,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_name (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 7 @@ -344,36 +329,36 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_name (type: string) - mode: hash + Select Operator + expressions: p_name (type: string) outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 10 Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -463,7 +448,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -472,7 +457,7 @@ POSTHOOK: query: select * from part_null where p_name IN (select p_name from par POSTHOOK: type: QUERY POSTHOOK: Input: default@part_null #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null) @@ -499,19 +484,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 @@ -519,19 +501,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_name (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 7 @@ -559,36 +538,36 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_type (type: string) - mode: hash + Select Operator + expressions: p_type (type: string) outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 10 Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -678,7 +657,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -713,7 +692,7 @@ POSTHOOK: Input: default@part_null 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra 15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null) @@ -740,19 +719,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_brand is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 @@ -760,19 +736,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_brand is not null (type: boolean) + Group By Operator + keys: p_brand (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_brand (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 7 @@ -800,18 +773,19 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: p_name (type: string) + keys: _col0 (type: string), true (type: boolean) mode: hash - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs @@ -819,20 +793,16 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -922,7 +892,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -931,7 +901,7 @@ POSTHOOK: query: select * from part_null where p_brand IN (select p_brand from p POSTHOOK: type: QUERY POSTHOOK: Input: default@part_null #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part_null where p_name NOT IN (select c from tempty) AND p_brand IN (select p_brand from part_null) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_name NOT IN (select c from tempty) AND p_brand IN (select p_brand from part_null) @@ -958,17 +928,14 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_brand is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + sort order: Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 @@ -996,16 +963,20 @@ STAGE PLANS: TableScan alias: tempty Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: c (type: char(2)) - mode: hash + Select Operator + expressions: c (type: char(2)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(2)) - sort order: + - Map-reduce partition columns: _col0 (type: char(2)) + Group By Operator + keys: _col0 (type: char(2)), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(2)), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: char(2)), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 9 @@ -1013,19 +984,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_brand is not null (type: boolean) + Group By Operator + keys: p_brand (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_brand (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 10 @@ -1116,20 +1084,16 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: char(2)) + keys: KEY._col0 (type: char(2)), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: char(2)), true (type: boolean) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: UDFToString(_col0) (type: string) + sort order: + + Map-reduce partition columns: UDFToString(_col0) (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: UDFToString(_col0) (type: string) - sort order: + - Map-reduce partition columns: UDFToString(_col0) (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -1137,7 +1101,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from part_null where p_name NOT IN (select c from tempty) AND p_brand IN (select p_brand from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -1175,7 +1139,7 @@ POSTHOOK: Input: default@tempty 78487 NULL Manufacturer#6 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu 155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull) @@ -1200,19 +1164,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -1220,19 +1181,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_name (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 6 @@ -1320,7 +1278,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -1357,7 +1315,7 @@ POSTHOOK: Input: default@tnull 17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty) @@ -1382,19 +1340,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_size is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: int) - sort order: + - Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -1402,19 +1357,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_size is not null (type: boolean) + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_size (type: int) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 6 @@ -1502,7 +1454,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -1513,7 +1465,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part_null POSTHOOK: Input: default@tempty #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND NOT EXISTS (select c from tempty) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND NOT EXISTS (select c from tempty) @@ -1538,19 +1490,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -1558,19 +1507,16 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_name (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 6 @@ -1664,7 +1610,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND NOT EXISTS (select c from tempty) PREHOOK: type: QUERY PREHOOK: Input: default@part_null diff --git ql/src/test/results/clientpositive/llap/subquery_notin.q.out ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 48fe336..01604da 100644 --- ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from src @@ -72,16 +72,20 @@ STAGE PLANS: Filter Operator predicate: (key > '2') (type: boolean) Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string) - mode: hash + Select Operator + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -142,20 +146,16 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6279 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -163,7 +163,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from src where src.key not in ( select key from src s1 where s1.key > '2') @@ -822,20 +822,16 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: double) + keys: KEY._col0 (type: double), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: double), true (type: boolean) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -912,648 +908,34 @@ STAGE PLANS: Select Operator expressions: _col5 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: avg(_col0) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(), count(_col0) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col2, _col5 - Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col5 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: avg(_col0) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[51][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: select p_name, p_size -from -part where part.p_size not in - (select avg(p_size) - from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 - ) -order by p_name, p_size -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select p_name, p_size -from -part where part.p_size not in - (select avg(p_size) - from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 - ) -order by p_name, p_size -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -almond antique blue firebrick mint 31 -almond antique burnished rose metallic 2 -almond antique burnished rose metallic 2 -almond antique chartreuse khaki white 17 -almond antique chartreuse lavender yellow 34 -almond antique forest lavender goldenrod 14 -almond antique gainsboro frosted violet 10 -almond antique metallic orange dim 19 -almond antique misty red olive 1 -almond antique olive coral navajo 45 -almond antique sky peru orange 2 -almond antique violet chocolate turquoise 14 -almond antique violet mint lemon 39 -almond antique violet turquoise frosted 40 -almond aquamarine burnished black steel 28 -almond aquamarine dodger light gainsboro 46 -almond aquamarine floral ivory bisque 27 -almond aquamarine midnight light salmon 2 -almond aquamarine pink moccasin thistle 42 -almond aquamarine rose maroon antique 25 -almond aquamarine sandy cyan gainsboro 18 -almond aquamarine yellow dodger mint 7 -almond azure aquamarine papaya violet 12 -almond azure blanched chiffon midnight 23 -PREHOOK: query: explain -select p_mfgr, p_name, p_size -from part b where b.p_size not in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr - ) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select p_mfgr, p_name, p_size -from part b where b.p_size not in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr - ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 10 <- Map 9 (SIMPLE_EDGE) - Reducer 12 <- Map 11 (SIMPLE_EDGE) - Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) - Reducer 14 <- Reducer 13 (SIMPLE_EDGE) - Reducer 16 <- Map 15 (SIMPLE_EDGE) - Reducer 18 <- Map 17 (SIMPLE_EDGE) - Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 20 <- Reducer 19 (SIMPLE_EDGE) - Reducer 21 <- Reducer 20 (SIMPLE_EDGE) - Reducer 22 <- Reducer 21 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) - Reducer 24 <- Map 23 (SIMPLE_EDGE) - Reducer 26 <- Map 25 (SIMPLE_EDGE) - Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 22 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Reducer 10 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col2 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 11 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: llap - LLAP IO: no inputs - Map 15 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 17 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: llap - LLAP IO: no inputs - Map 23 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 25 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_size (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: llap - LLAP IO: no inputs - Map 9 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Reducer 10 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 12 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col2, _col5 - Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col5 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 13 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col1 (type: int) - outputColumnNames: _col2, _col1 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col1) - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 14 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(), count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 16 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 18 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col2, _col5 - Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col5 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 19 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col1 (type: int) - outputColumnNames: _col2, _col1 - Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col1) - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col4, true) > 0) (type: boolean) - Statistics: Num rows: 8 Data size: 1848 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 1848 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 8 Data size: 1848 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col2 (type: int) - Reducer 20 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: int), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 21 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: boolean) - Reducer 22 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col3 (type: int) - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: boolean) - Reducer 24 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 26 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col6, _col7 - Statistics: Num rows: 8 Data size: 1912 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 8 Data size: 1912 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col6 (type: bigint), _col7 (type: bigint) - Reducer 4 + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: avg(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Reducer 6 Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string), _col2 (type: int) - 1 _col1 (type: string), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col10 - Statistics: Num rows: 8 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: CASE WHEN ((_col6 = 0)) THEN (true) WHEN (_col6 is null) THEN (true) WHEN (_col10 is not null) THEN (false) WHEN (_col2 is null) THEN (null) WHEN ((_col7 < _col6)) THEN (false) ELSE (true) END (type: boolean) - Statistics: Num rows: 4 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Group By Operator + aggregations: avg(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col0) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 8 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1585,55 +967,36 @@ STAGE PLANS: predicate: (rank_window_0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: string) + expressions: _col5 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2 - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: avg(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + aggregations: avg(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: complete + keys: _col0 (type: double), true (type: boolean) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + key expressions: _col0 (type: double), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1641,45 +1004,54 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select p_mfgr, p_name, p_size -from part b where b.p_size not in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr +Warning: Shuffle Join MERGEJOIN[51][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 ) +order by p_name, p_size PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size -from part b where b.p_size not in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr +POSTHOOK: query: select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 ) +order by p_name, p_size POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -Manufacturer#1 almond antique salmon chartreuse burlywood 6 -Manufacturer#1 almond aquamarine burnished black steel 28 -Manufacturer#1 almond antique chartreuse lavender yellow 34 -Manufacturer#1 almond aquamarine pink moccasin thistle 42 -Manufacturer#2 almond antique violet chocolate turquoise 14 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 -Manufacturer#2 almond aquamarine rose maroon antique 25 -Manufacturer#2 almond antique violet turquoise frosted 40 -Manufacturer#3 almond antique forest lavender goldenrod 14 -Manufacturer#3 almond antique chartreuse khaki white 17 -Manufacturer#3 almond antique metallic orange dim 19 -Manufacturer#3 almond antique olive coral navajo 45 -Manufacturer#4 almond antique gainsboro frosted violet 10 -Manufacturer#4 almond azure aquamarine papaya violet 12 -Manufacturer#4 almond aquamarine floral ivory bisque 27 -Manufacturer#4 almond antique violet mint lemon 39 -Manufacturer#5 almond antique medium spring khaki 6 -Manufacturer#5 almond azure blanched chiffon midnight 23 -Manufacturer#5 almond antique blue firebrick mint 31 -Manufacturer#5 almond aquamarine dodger light gainsboro 46 -Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +almond antique blue firebrick mint 31 +almond antique burnished rose metallic 2 +almond antique burnished rose metallic 2 +almond antique chartreuse khaki white 17 +almond antique chartreuse lavender yellow 34 +almond antique forest lavender goldenrod 14 +almond antique gainsboro frosted violet 10 +almond antique metallic orange dim 19 +almond antique misty red olive 1 +almond antique olive coral navajo 45 +almond antique sky peru orange 2 +almond antique violet chocolate turquoise 14 +almond antique violet mint lemon 39 +almond antique violet turquoise frosted 40 +almond aquamarine burnished black steel 28 +almond aquamarine dodger light gainsboro 46 +almond aquamarine floral ivory bisque 27 +almond aquamarine midnight light salmon 2 +almond aquamarine pink moccasin thistle 42 +almond aquamarine rose maroon antique 25 +almond aquamarine sandy cyan gainsboro 18 +almond aquamarine yellow dodger mint 7 +almond azure aquamarine papaya violet 12 +almond azure blanched chiffon midnight 23 +Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select li.l_partkey, count(*) from lineitem li where li.l_linenumber = 1 and @@ -1712,7 +1084,7 @@ POSTHOOK: Input: default@lineitem 139636 1 175839 1 182052 1 -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from src where not src.key in ( select key from src s1 where s1.key > '2') @@ -1951,22 +1323,22 @@ STAGE PLANS: insideView TRUE Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '11') and CASE WHEN ((key > '104')) THEN (null) ELSE ((key < '11')) END) (type: boolean) - Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < '11') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string) outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 15272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 31208 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: string) + keys: _col0 (type: string), true (type: boolean) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 35 Data size: 6440 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 35 Data size: 6440 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2027,20 +1399,16 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 35 Data size: 6440 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 69 Data size: 12972 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -2586,34 +1954,34 @@ STAGE PLANS: expressions: _col1 (type: int) outputColumnNames: _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: int) - mode: hash + Select Operator + expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -2818,34 +2186,34 @@ STAGE PLANS: expressions: _col1 (type: int) outputColumnNames: _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: int) - mode: hash + Select Operator + expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -3397,35 +2765,31 @@ STAGE PLANS: Select Operator expressions: floor(_col1) (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: bigint) + keys: _col0 (type: bigint), true (type: boolean) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: bigint), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: boolean) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: bigint) + keys: KEY._col0 (type: bigint), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint), true (type: boolean) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -4794,7 +4158,7 @@ almond azure aquamarine papaya violet almond antique medium spring khaki almond aquamarine sandy cyan gainsboro almond antique olive coral navajo -Warning: Shuffle Join MERGEJOIN[56][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select key, count(*) from src where value NOT IN (select key from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY POSTHOOK: query: explain select key, count(*) from src where value NOT IN (select key from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) @@ -4882,16 +4246,20 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string) - mode: hash + Select Operator + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 205 Data size: 18655 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 205 Data size: 18655 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 11 @@ -4907,19 +4275,16 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: bigint) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 12 Execution mode: llap Reduce Operator Tree: @@ -4989,15 +4354,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Map-reduce partition columns: _col1 (type: bigint) - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -5032,20 +4394,16 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 205 Data size: 18655 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 205 Data size: 18655 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 205 Data size: 18655 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -5053,7 +4411,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[56][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select key, count(*) from src where value NOT IN (select key from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -5645,20 +5003,16 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -5760,16 +5114,20 @@ STAGE PLANS: expressions: _col1 (type: int) outputColumnNames: _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: int) - mode: hash + Select Operator + expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -5890,20 +5248,16 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -6009,16 +5363,20 @@ STAGE PLANS: expressions: _col1 (type: int) outputColumnNames: _col1 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: int) - mode: hash + Select Operator + expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -6088,15 +5446,15 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: string) + keys: _col0 (type: string), true (type: boolean) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 @@ -6149,35 +5507,31 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: string) + keys: _col0 (type: string), true (type: boolean) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 10 Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -7124,7 +6478,7 @@ POSTHOOK: query: INSERT INTO t2 VALUES (null), (2), (100) POSTHOOK: type: QUERY POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.c1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2) PREHOOK: type: QUERY POSTHOOK: query: explain SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2) @@ -7184,16 +6538,20 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: c1 (type: int) - mode: hash + Select Operator + expressions: c1 (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -7254,20 +6612,16 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -7275,7 +6629,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -8184,16 +7538,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: boolean) + value expressions: _col1 (type: int) Reducer 12 Execution mode: llap Reduce Operator Tree: @@ -8203,14 +7553,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int), _col1 (type: int) + key expressions: _col2 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col3 (type: int), _col1 (type: int) + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: boolean) + value expressions: _col0 (type: int) Reducer 14 Execution mode: llap Reduce Operator Tree: @@ -8262,11 +7612,11 @@ STAGE PLANS: Left Outer Join0 to 1 keys: 0 _col1 (type: int), _col1 (type: int) - 1 _col3 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col7 + 1 _col2 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col7 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) + predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col5 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) @@ -8540,16 +7890,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: boolean) + value expressions: _col1 (type: int) Reducer 12 Execution mode: llap Reduce Operator Tree: @@ -8559,14 +7905,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int), _col1 (type: int) + key expressions: _col2 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col3 (type: int), _col1 (type: int) + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: boolean) + value expressions: _col0 (type: int) Reducer 14 Execution mode: llap Reduce Operator Tree: @@ -8618,11 +7964,11 @@ STAGE PLANS: Left Outer Join0 to 1 keys: 0 _col1 (type: int), _col1 (type: int) - 1 _col3 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col7 + 1 _col2 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col7 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) + predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col5 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) @@ -8864,16 +8210,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: boolean) + value expressions: _col1 (type: int) Reducer 12 Execution mode: llap Reduce Operator Tree: @@ -8883,14 +8225,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int), _col1 (type: int) + key expressions: _col2 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col3 (type: int), _col1 (type: int) + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: boolean) + value expressions: _col0 (type: int) Reducer 14 Execution mode: llap Reduce Operator Tree: @@ -8942,11 +8284,11 @@ STAGE PLANS: Left Outer Join0 to 1 keys: 0 _col0 (type: int), _col1 (type: int) - 1 _col3 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col3, _col4, _col7 + 1 _col2 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col3, _col4, _col5 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col7 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) + predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col5 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) @@ -9030,7 +8372,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t #### A masked pattern was here #### 7 -Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select t.i from t where t.j NOT IN (select t1.i from t t1 ) PREHOOK: type: QUERY POSTHOOK: query: explain select t.i from t where t.j NOT IN (select t1.i from t t1 ) @@ -9090,16 +8432,20 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: i (type: int) - mode: hash + Select Operator + expressions: i (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -9128,10 +8474,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 3 Data size: 67 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col2 = 0) or (_col5 is null and _col1 is not null and (_col3 >= _col2))) (type: boolean) + predicate: ((_col2 = 0) or (_col4 is null and _col1 is not null and (_col3 >= _col2))) (type: boolean) Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) @@ -9160,20 +8506,19 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -9181,7 +8526,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select t.i from t where t.j NOT IN (select t1.i from t t1 ) PREHOOK: type: QUERY PREHOOK: Input: default@t @@ -9192,7 +8537,7 @@ POSTHOOK: Input: default@t #### A masked pattern was here #### 1 4 -Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select t.i from t where t.i NOT IN (select t1.i from t t1 ) PREHOOK: type: QUERY POSTHOOK: query: explain select t.i from t where t.i NOT IN (select t1.i from t t1 ) @@ -9252,16 +8597,20 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: i (type: int) - mode: hash + Select Operator + expressions: i (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) + Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -9290,10 +8639,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 67 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col1 = 0) or (_col4 is null and _col0 is not null and (_col2 >= _col1))) (type: boolean) + predicate: ((_col1 = 0) or (_col3 is null and _col0 is not null and (_col2 >= _col1))) (type: boolean) Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) @@ -9322,20 +8671,19 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -9343,7 +8691,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select t.i from t where t.i NOT IN (select t1.i from t t1 ) PREHOOK: type: QUERY PREHOOK: Input: default@t diff --git ql/src/test/results/clientpositive/llap/subquery_null_agg.q.out ql/src/test/results/clientpositive/llap/subquery_null_agg.q.out index 7d9d77c..3591c44 100644 --- ql/src/test/results/clientpositive/llap/subquery_null_agg.q.out +++ ql/src/test/results/clientpositive/llap/subquery_null_agg.q.out @@ -105,14 +105,14 @@ STAGE PLANS: predicate: false (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - keys: false (type: boolean) + keys: false (type: boolean), true (type: boolean) mode: hash - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) + key expressions: _col0 (type: boolean), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: boolean), _col1 (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs @@ -140,10 +140,10 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col3 + outputColumnNames: _col1, _col4 Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col3 is null or (_col1 = 0)) (type: boolean) + predicate: (_col4 is null or (_col1 = 0)) (type: boolean) Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: null (type: double) @@ -179,18 +179,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: boolean) + keys: KEY._col0 (type: boolean), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: true (type: boolean) - outputColumnNames: _col0 + expressions: _col1 (type: boolean) + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - value expressions: _col0 (type: boolean) + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/subquery_scalar.q.out ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index a1a74a7..3be4b6a 100644 --- ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -399,7 +399,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_name = (select p_name from part_null where p_name is null) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_name = (select p_name from part_null where p_name is null) @@ -413,8 +413,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -422,20 +423,17 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (p_name = null) (type: boolean) - Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: part_null @@ -456,7 +454,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: part_null @@ -467,7 +465,9 @@ STAGE PLANS: Select Operator Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: null (type: string) + sort order: + + Map-reduce partition columns: null (type: string) Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs @@ -477,25 +477,40 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 16328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 null (type: string) outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 16 Data size: 11084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), null (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 16 Data size: 11084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 11084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -518,7 +533,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from part where p_name = (select p_name from part_null where p_name is null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -4223,9 +4238,9 @@ POSTHOOK: Input: default@part 65667 144293 15103 -Warning: Shuffle Join MERGEJOIN[53][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[54][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[54][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[56][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part) @@ -4289,16 +4304,20 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_name (type: string) - mode: hash + Select Operator + expressions: p_name (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -4357,20 +4376,16 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -4492,9 +4507,9 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[53][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[54][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[54][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[56][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part) PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git ql/src/test/results/clientpositive/llap/table_nonprintable.q.out ql/src/test/results/clientpositive/llap/table_nonprintable.q.out index c9b962e..d7c93f2 100644 --- ql/src/test/results/clientpositive/llap/table_nonprintable.q.out +++ ql/src/test/results/clientpositive/llap/table_nonprintable.q.out @@ -20,10 +20,8 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@table_external PREHOOK: query: msck repair table table_external PREHOOK: type: MSCK -PREHOOK: Output: default@table_external POSTHOOK: query: msck repair table table_external POSTHOOK: type: MSCK -POSTHOOK: Output: default@table_external Partitions not in metastore: table_external:day=¢Bar Repair: Cannot add partition table_external:day=Foo due to invalid characters in the name Repair: Added partition to metastore table_external:day=¢Bar diff --git ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index 7de04a7..9fbce7d 100644 --- ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -151,7 +151,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map 3 Map Operator Tree: @@ -171,7 +171,7 @@ STAGE PLANS: Map-reduce partition columns: _col10 (type: binary) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index a0a3393..c27668f 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: explain select * from src @@ -30,32 +30,32 @@ Stage-0 Select Operator [SEL_36] (rows=500 width=178) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - Select Operator [SEL_21] (rows=500 width=178) + SHUFFLE [RS_23] + Select Operator [SEL_22] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_20] (rows=500 width=198) + Filter Operator [FIL_21] (rows=500 width=198) predicate:((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) - Map Join Operator [MAPJOIN_28] (rows=500 width=198) - Conds:MAPJOIN_27._col0=RS_35._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"] + Map Join Operator [MAPJOIN_29] (rows=500 width=198) + Conds:MAPJOIN_28._col0=RS_35._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"] <-Reducer 6 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_35] PartitionCols:_col0 - Select Operator [SEL_34] (rows=205 width=91) - Output:["_col0","_col1"] - Group By Operator [GBY_33] (rows=205 width=87) - Output:["_col0"],keys:KEY._col0 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - PartitionCols:_col0 - Group By Operator [GBY_10] (rows=205 width=87) - Output:["_col0"],keys:key + Group By Operator [GBY_34] (rows=205 width=91) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col0, _col1 + Group By Operator [GBY_11] (rows=205 width=91) + Output:["_col0","_col1"],keys:_col0, true + Select Operator [SEL_9] (rows=500 width=87) + Output:["_col0"] TableScan [TS_8] (rows=500 width=87) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map Join Operator [MAPJOIN_27] (rows=500 width=194) + <-Map Join Operator [MAPJOIN_28] (rows=500 width=194) Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 4 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_32] - Group By Operator [GBY_31] (rows=1 width=16) + BROADCAST [RS_33] + Group By Operator [GBY_32] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"] <-Map 3 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_5] @@ -70,7 +70,7 @@ Stage-0 TableScan [TS_0] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select * from src where not key in diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index d0efdb0..6ddfd76 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -49,7 +49,7 @@ STAGE PLANS: alias: li Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) @@ -69,7 +69,7 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + predicate: (l_shipmode = 'AIR') (type: boolean) Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int) diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 203ded8..3d087b3 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -2958,7 +2958,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -3024,7 +3024,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) @@ -3081,7 +3081,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) @@ -3176,19 +3175,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: @@ -3225,19 +3221,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Union 6 Vertex: Union 6 @@ -3288,7 +3281,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) @@ -3385,19 +3377,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: @@ -3434,19 +3423,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Union 6 Vertex: Union 6 @@ -3499,7 +3485,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: ds (type: string) @@ -3537,7 +3522,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: ds (type: string) @@ -3622,19 +3606,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: @@ -3687,19 +3668,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Union 3 Vertex: Union 3 Union 9 @@ -5321,7 +5299,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) @@ -5382,19 +5359,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -5446,19 +5420,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Union 4 Vertex: Union 4 diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out deleted file mode 100644 index 29f2391..0000000 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ /dev/null @@ -1,932 +0,0 @@ -PREHOOK: query: create table dsrv_big stored as orc as select key as key_str, cast(key as int) as key_int, value from src -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@dsrv_big -POSTHOOK: query: create table dsrv_big stored as orc as select key as key_str, cast(key as int) as key_int, value from src -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dsrv_big -POSTHOOK: Lineage: dsrv_big.key_int EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dsrv_big.key_str SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dsrv_big.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table dsrv_small stored as orc as select distinct key as key_str, cast(key as int) as key_int, value from src where key < 100 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@dsrv_small -POSTHOOK: query: create table dsrv_small stored as orc as select distinct key as key_str, cast(key as int) as key_int, value from src where key < 100 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dsrv_small -POSTHOOK: Lineage: dsrv_small.key_int EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dsrv_small.key_str SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dsrv_small.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key_int is not null and key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) (type: boolean) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key_int is not null and key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) (type: boolean) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key_int (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: key_int is not null (type: boolean) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key_int is not null (type: boolean) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key_int (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) -PREHOOK: type: QUERY -PREHOOK: Input: default@dsrv_big -PREHOOK: Input: default@dsrv_small -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dsrv_big -POSTHOOK: Input: default@dsrv_small -#### A masked pattern was here #### -84 -PREHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key_str is not null and key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter))) (type: boolean) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key_str is not null and key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter))) (type: boolean) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key_str (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: key_str is not null (type: boolean) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key_str is not null (type: boolean) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key_str (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 550 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str) -PREHOOK: type: QUERY -PREHOOK: Input: default@dsrv_big -PREHOOK: Input: default@dsrv_small -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dsrv_big -POSTHOOK: Input: default@dsrv_small -#### A masked pattern was here #### -84 -PREHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key_str is not null and key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter))) (type: boolean) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key_str is not null and key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter))) (type: boolean) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key_str (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: key_str is not null (type: boolean) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key_str is not null (type: boolean) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key_str (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 550 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_str) -PREHOOK: type: QUERY -PREHOOK: Input: default@dsrv_big -PREHOOK: Input: default@dsrv_small -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_str) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dsrv_big -POSTHOOK: Input: default@dsrv_small -#### A masked pattern was here #### -84 -PREHOOK: query: EXPLAIN select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key_int is not null and key_int BETWEEN DynamicValue(RS_10_b_key_int_min) AND DynamicValue(RS_10_b_key_int_max) and key_int BETWEEN DynamicValue(RS_11_c_key_int_min) AND DynamicValue(RS_11_c_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_10_b_key_int_bloom_filter)) and in_bloom_filter(key_int, DynamicValue(RS_11_c_key_int_bloom_filter))) (type: boolean) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key_int is not null and key_int BETWEEN DynamicValue(RS_10_b_key_int_min) AND DynamicValue(RS_10_b_key_int_max) and key_int BETWEEN DynamicValue(RS_11_c_key_int_min) AND DynamicValue(RS_11_c_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_10_b_key_int_bloom_filter)) and in_bloom_filter(key_int, DynamicValue(RS_11_c_key_int_bloom_filter))) (type: boolean) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key_int (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: key_int is not null (type: boolean) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key_int is not null (type: boolean) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key_int (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 6 - Map Operator Tree: - TableScan - alias: c - filterExpr: key_int is not null (type: boolean) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key_int is not null (type: boolean) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key_int (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Statistics: Num rows: 1100 Data size: 198000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 7 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int -PREHOOK: type: QUERY -PREHOOK: Input: default@dsrv_big -PREHOOK: Input: default@dsrv_small -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dsrv_big -POSTHOOK: Input: default@dsrv_small -#### A masked pattern was here #### -84 -PREHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 4 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key_str is not null and key_int is not null and key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter)) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) (type: boolean) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key_str is not null and key_int is not null and key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter)) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) (type: boolean) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key_str (type: string), key_int (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: (key_str is not null and key_int is not null) (type: boolean) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key_str is not null and key_int is not null) (type: boolean) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key_str (type: string), key_int (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: int) - 1 _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 550 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int) -PREHOOK: type: QUERY -PREHOOK: Input: default@dsrv_big -PREHOOK: Input: default@dsrv_small -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dsrv_big -POSTHOOK: Input: default@dsrv_small -#### A masked pattern was here #### -84 -PREHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2') -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2') -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key_int is not null and key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) (type: boolean) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key_int is not null and key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) (type: boolean) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key_int (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - filterExpr: ((value) IN ('nonexistent1', 'nonexistent2') and key_int is not null) (type: boolean) - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value) IN ('nonexistent1', 'nonexistent2') and key_int is not null) (type: boolean) - Statistics: Num rows: 29 Data size: 5162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key_int (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 29 Data size: 5162 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 29 Data size: 5162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 29 Data size: 5162 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=29) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=29) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2') -PREHOOK: type: QUERY -PREHOOK: Input: default@dsrv_big -PREHOOK: Input: default@dsrv_small -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dsrv_big -POSTHOOK: Input: default@dsrv_small -#### A masked pattern was here #### -0 -PREHOOK: query: drop table dsrv_big -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@dsrv_big -PREHOOK: Output: default@dsrv_big -POSTHOOK: query: drop table dsrv_big -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@dsrv_big -POSTHOOK: Output: default@dsrv_big -PREHOOK: query: drop table dsrv_small -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@dsrv_small -PREHOOK: Output: default@dsrv_small -POSTHOOK: query: drop table dsrv_small -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@dsrv_small -POSTHOOK: Output: default@dsrv_small diff --git ql/src/test/results/clientpositive/msck_repair_0.q.out ql/src/test/results/clientpositive/msck_repair_0.q.out index 3f2fe75..c394f9b 100644 --- ql/src/test/results/clientpositive/msck_repair_0.q.out +++ ql/src/test/results/clientpositive/msck_repair_0.q.out @@ -12,31 +12,23 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE default.repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable Partitions not in metastore: repairtable:p1=c/p2=a PREHOOK: query: MSCK REPAIR TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK REPAIR TABLE default.repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable Partitions not in metastore: repairtable:p1=c/p2=a Repair: Added partition to metastore default.repairtable:p1=c/p2=a PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: DROP TABLE default.repairtable PREHOOK: type: DROPTABLE PREHOOK: Input: default@repairtable diff --git ql/src/test/results/clientpositive/msck_repair_1.q.out ql/src/test/results/clientpositive/msck_repair_1.q.out index 3f2fe75..c394f9b 100644 --- ql/src/test/results/clientpositive/msck_repair_1.q.out +++ ql/src/test/results/clientpositive/msck_repair_1.q.out @@ -12,31 +12,23 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE default.repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable Partitions not in metastore: repairtable:p1=c/p2=a PREHOOK: query: MSCK REPAIR TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK REPAIR TABLE default.repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable Partitions not in metastore: repairtable:p1=c/p2=a Repair: Added partition to metastore default.repairtable:p1=c/p2=a PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: DROP TABLE default.repairtable PREHOOK: type: DROPTABLE PREHOOK: Input: default@repairtable diff --git ql/src/test/results/clientpositive/msck_repair_2.q.out ql/src/test/results/clientpositive/msck_repair_2.q.out index 3f2fe75..c394f9b 100644 --- ql/src/test/results/clientpositive/msck_repair_2.q.out +++ ql/src/test/results/clientpositive/msck_repair_2.q.out @@ -12,31 +12,23 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE default.repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable Partitions not in metastore: repairtable:p1=c/p2=a PREHOOK: query: MSCK REPAIR TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK REPAIR TABLE default.repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable Partitions not in metastore: repairtable:p1=c/p2=a Repair: Added partition to metastore default.repairtable:p1=c/p2=a PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: DROP TABLE default.repairtable PREHOOK: type: DROPTABLE PREHOOK: Input: default@repairtable diff --git ql/src/test/results/clientpositive/msck_repair_3.q.out ql/src/test/results/clientpositive/msck_repair_3.q.out index 3f2fe75..c394f9b 100644 --- ql/src/test/results/clientpositive/msck_repair_3.q.out +++ ql/src/test/results/clientpositive/msck_repair_3.q.out @@ -12,31 +12,23 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE default.repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable Partitions not in metastore: repairtable:p1=c/p2=a PREHOOK: query: MSCK REPAIR TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK REPAIR TABLE default.repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable Partitions not in metastore: repairtable:p1=c/p2=a Repair: Added partition to metastore default.repairtable:p1=c/p2=a PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: DROP TABLE default.repairtable PREHOOK: type: DROPTABLE PREHOOK: Input: default@repairtable diff --git ql/src/test/results/clientpositive/msck_repair_batchsize.q.out ql/src/test/results/clientpositive/msck_repair_batchsize.q.out index a0180b7..0300f20 100644 --- ql/src/test/results/clientpositive/msck_repair_batchsize.q.out +++ ql/src/test/results/clientpositive/msck_repair_batchsize.q.out @@ -12,33 +12,25 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE default.repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable Partitions not in metastore: repairtable:p1=a/p2=a repairtable:p1=b/p2=a repairtable:p1=c/p2=a PREHOOK: query: MSCK REPAIR TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK REPAIR TABLE default.repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable Partitions not in metastore: repairtable:p1=a/p2=a repairtable:p1=b/p2=a repairtable:p1=c/p2=a Repair: Added partition to metastore default.repairtable:p1=a/p2=a Repair: Added partition to metastore default.repairtable:p1=b/p2=a Repair: Added partition to metastore default.repairtable:p1=c/p2=a PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: DROP TABLE default.repairtable PREHOOK: type: DROPTABLE PREHOOK: Input: default@repairtable diff --git ql/src/test/results/clientpositive/partition_coltype_literals.q.out ql/src/test/results/clientpositive/partition_coltype_literals.q.out index fad937d..61d6423 100644 --- ql/src/test/results/clientpositive/partition_coltype_literals.q.out +++ ql/src/test/results/clientpositive/partition_coltype_literals.q.out @@ -355,7 +355,7 @@ Database: default Table: partcoltypenum #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"value\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} #### A masked pattern was here #### numFiles 2 numRows 30 @@ -404,7 +404,7 @@ Database: default Table: partcoltypenum #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"value\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} #### A masked pattern was here #### numFiles 2 numRows 30 diff --git ql/src/test/results/clientpositive/perf/query14.q.out ql/src/test/results/clientpositive/perf/query14.q.out index 21cabbc..171a22a 100644 --- ql/src/test/results/clientpositive/perf/query14.q.out +++ ql/src/test/results/clientpositive/perf/query14.q.out @@ -1,9 +1,9 @@ Warning: Shuffle Join MERGEJOIN[916][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 114' is a cross product Warning: Shuffle Join MERGEJOIN[917][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 115' is a cross product -Warning: Shuffle Join MERGEJOIN[912][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[913][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product Warning: Shuffle Join MERGEJOIN[914][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 61' is a cross product Warning: Shuffle Join MERGEJOIN[915][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 62' is a cross product +Warning: Shuffle Join MERGEJOIN[912][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[913][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain with cross_items as (select i_item_sk ss_item_sk @@ -570,7 +570,7 @@ Stage-0 Select Operator [SEL_486] (rows=462000 width=1436) Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_857] (rows=462000 width=1436) - predicate:(i_brand_id is not null and i_class_id is not null and i_category_id is not null and i_item_sk is not null) + predicate:(i_brand_id is not null and i_class_id is not null and i_category_id is not null) TableScan [TS_484] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id"] <-Reducer 148 [SIMPLE_EDGE] @@ -801,7 +801,7 @@ Stage-0 Select Operator [SEL_94] (rows=462000 width=1436) Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_807] (rows=462000 width=1436) - predicate:(i_brand_id is not null and i_class_id is not null and i_category_id is not null and i_item_sk is not null) + predicate:(i_brand_id is not null and i_class_id is not null and i_category_id is not null) TableScan [TS_92] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id"] <-Reducer 42 [SIMPLE_EDGE] @@ -1408,7 +1408,7 @@ Stage-0 Select Operator [SEL_289] (rows=462000 width=1436) Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_832] (rows=462000 width=1436) - predicate:(i_brand_id is not null and i_class_id is not null and i_category_id is not null and i_item_sk is not null) + predicate:(i_brand_id is not null and i_class_id is not null and i_category_id is not null) TableScan [TS_287] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id"] <-Reducer 95 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/perf/query23.q.out ql/src/test/results/clientpositive/perf/query23.q.out index 85cee23..c8f3b85 100644 --- ql/src/test/results/clientpositive/perf/query23.q.out +++ ql/src/test/results/clientpositive/perf/query23.q.out @@ -180,7 +180,7 @@ Stage-0 Select Operator [SEL_127] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_346] (rows=144002668 width=135) - predicate:(ws_item_sk is not null and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + predicate:ws_sold_date_sk is not null TableScan [TS_125] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"] <-Map 39 [SIMPLE_EDGE] @@ -642,7 +642,7 @@ Stage-0 Select Operator [SEL_2] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_333] (rows=287989836 width=135) - predicate:(cs_item_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + predicate:cs_sold_date_sk is not null TableScan [TS_0] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"] <-Map 7 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/perf/query33.q.out ql/src/test/results/clientpositive/perf/query33.q.out index 342bd90..89660e3 100644 --- ql/src/test/results/clientpositive/perf/query33.q.out +++ ql/src/test/results/clientpositive/perf/query33.q.out @@ -208,7 +208,7 @@ Stage-0 Select Operator [SEL_39] (rows=462000 width=1436) Output:["_col0","_col1"] Filter Operator [FIL_164] (rows=462000 width=1436) - predicate:(i_manufact_id is not null and i_item_sk is not null) + predicate:i_item_sk is not null TableScan [TS_37] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_manufact_id"] <-Reducer 20 [SIMPLE_EDGE] @@ -224,7 +224,7 @@ Stage-0 Select Operator [SEL_42] (rows=231000 width=1436) Output:["i_manufact_id"] Filter Operator [FIL_165] (rows=231000 width=1436) - predicate:((i_category) IN ('Books') and i_manufact_id is not null) + predicate:(i_category) IN ('Books') TableScan [TS_40] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_category","i_manufact_id"] <-Reducer 23 [SIMPLE_EDGE] @@ -291,7 +291,7 @@ Stage-0 Select Operator [SEL_78] (rows=462000 width=1436) Output:["_col0","_col1"] Filter Operator [FIL_169] (rows=462000 width=1436) - predicate:(i_manufact_id is not null and i_item_sk is not null) + predicate:i_item_sk is not null TableScan [TS_76] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_manufact_id"] <-Reducer 31 [SIMPLE_EDGE] @@ -307,7 +307,7 @@ Stage-0 Select Operator [SEL_81] (rows=231000 width=1436) Output:["i_manufact_id"] Filter Operator [FIL_170] (rows=231000 width=1436) - predicate:((i_category) IN ('Books') and i_manufact_id is not null) + predicate:(i_category) IN ('Books') TableScan [TS_79] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_category","i_manufact_id"] <-Reducer 34 [SIMPLE_EDGE] @@ -413,7 +413,7 @@ Stage-0 Select Operator [SEL_2] (rows=462000 width=1436) Output:["_col0","_col1"] Filter Operator [FIL_159] (rows=462000 width=1436) - predicate:(i_manufact_id is not null and i_item_sk is not null) + predicate:i_item_sk is not null TableScan [TS_0] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_manufact_id"] <-Reducer 9 [SIMPLE_EDGE] @@ -429,7 +429,7 @@ Stage-0 Select Operator [SEL_5] (rows=231000 width=1436) Output:["i_manufact_id"] Filter Operator [FIL_160] (rows=231000 width=1436) - predicate:((i_category) IN ('Books') and i_manufact_id is not null) + predicate:(i_category) IN ('Books') TableScan [TS_3] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_category","i_manufact_id"] diff --git ql/src/test/results/clientpositive/perf/query56.q.out ql/src/test/results/clientpositive/perf/query56.q.out index 4fa28c2..2db6bfb 100644 --- ql/src/test/results/clientpositive/perf/query56.q.out +++ ql/src/test/results/clientpositive/perf/query56.q.out @@ -194,7 +194,7 @@ Stage-0 Select Operator [SEL_39] (rows=462000 width=1436) Output:["_col0","_col1"] Filter Operator [FIL_164] (rows=462000 width=1436) - predicate:(i_item_id is not null and i_item_sk is not null) + predicate:i_item_sk is not null TableScan [TS_37] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] <-Reducer 20 [SIMPLE_EDGE] @@ -210,7 +210,7 @@ Stage-0 Select Operator [SEL_42] (rows=231000 width=1436) Output:["i_item_id"] Filter Operator [FIL_165] (rows=231000 width=1436) - predicate:((i_color) IN ('orchid', 'chiffon', 'lace') and i_item_id is not null) + predicate:(i_color) IN ('orchid', 'chiffon', 'lace') TableScan [TS_40] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_color"] <-Reducer 23 [SIMPLE_EDGE] @@ -277,7 +277,7 @@ Stage-0 Select Operator [SEL_78] (rows=462000 width=1436) Output:["_col0","_col1"] Filter Operator [FIL_169] (rows=462000 width=1436) - predicate:(i_item_id is not null and i_item_sk is not null) + predicate:i_item_sk is not null TableScan [TS_76] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] <-Reducer 31 [SIMPLE_EDGE] @@ -293,7 +293,7 @@ Stage-0 Select Operator [SEL_81] (rows=231000 width=1436) Output:["i_item_id"] Filter Operator [FIL_170] (rows=231000 width=1436) - predicate:((i_color) IN ('orchid', 'chiffon', 'lace') and i_item_id is not null) + predicate:(i_color) IN ('orchid', 'chiffon', 'lace') TableScan [TS_79] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_color"] <-Reducer 34 [SIMPLE_EDGE] @@ -399,7 +399,7 @@ Stage-0 Select Operator [SEL_2] (rows=462000 width=1436) Output:["_col0","_col1"] Filter Operator [FIL_159] (rows=462000 width=1436) - predicate:(i_item_id is not null and i_item_sk is not null) + predicate:i_item_sk is not null TableScan [TS_0] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] <-Reducer 9 [SIMPLE_EDGE] @@ -415,7 +415,7 @@ Stage-0 Select Operator [SEL_5] (rows=231000 width=1436) Output:["i_item_id"] Filter Operator [FIL_160] (rows=231000 width=1436) - predicate:((i_color) IN ('orchid', 'chiffon', 'lace') and i_item_id is not null) + predicate:(i_color) IN ('orchid', 'chiffon', 'lace') TableScan [TS_3] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_color"] diff --git ql/src/test/results/clientpositive/perf/query60.q.out ql/src/test/results/clientpositive/perf/query60.q.out index ad9d08e..3b68c44 100644 --- ql/src/test/results/clientpositive/perf/query60.q.out +++ ql/src/test/results/clientpositive/perf/query60.q.out @@ -214,7 +214,7 @@ Stage-0 Select Operator [SEL_39] (rows=462000 width=1436) Output:["_col0","_col1"] Filter Operator [FIL_164] (rows=462000 width=1436) - predicate:(i_item_id is not null and i_item_sk is not null) + predicate:i_item_sk is not null TableScan [TS_37] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] <-Reducer 20 [SIMPLE_EDGE] @@ -230,7 +230,7 @@ Stage-0 Select Operator [SEL_42] (rows=231000 width=1436) Output:["i_item_id"] Filter Operator [FIL_165] (rows=231000 width=1436) - predicate:((i_category) IN ('Children') and i_item_id is not null) + predicate:(i_category) IN ('Children') TableScan [TS_40] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_category"] <-Reducer 23 [SIMPLE_EDGE] @@ -297,7 +297,7 @@ Stage-0 Select Operator [SEL_78] (rows=462000 width=1436) Output:["_col0","_col1"] Filter Operator [FIL_169] (rows=462000 width=1436) - predicate:(i_item_id is not null and i_item_sk is not null) + predicate:i_item_sk is not null TableScan [TS_76] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] <-Reducer 31 [SIMPLE_EDGE] @@ -313,7 +313,7 @@ Stage-0 Select Operator [SEL_81] (rows=231000 width=1436) Output:["i_item_id"] Filter Operator [FIL_170] (rows=231000 width=1436) - predicate:((i_category) IN ('Children') and i_item_id is not null) + predicate:(i_category) IN ('Children') TableScan [TS_79] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_category"] <-Reducer 34 [SIMPLE_EDGE] @@ -419,7 +419,7 @@ Stage-0 Select Operator [SEL_2] (rows=462000 width=1436) Output:["_col0","_col1"] Filter Operator [FIL_159] (rows=462000 width=1436) - predicate:(i_item_id is not null and i_item_sk is not null) + predicate:i_item_sk is not null TableScan [TS_0] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] <-Reducer 9 [SIMPLE_EDGE] @@ -435,7 +435,7 @@ Stage-0 Select Operator [SEL_5] (rows=231000 width=1436) Output:["i_item_id"] Filter Operator [FIL_160] (rows=231000 width=1436) - predicate:((i_category) IN ('Children') and i_item_id is not null) + predicate:(i_category) IN ('Children') TableScan [TS_3] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_category"] diff --git ql/src/test/results/clientpositive/perf/query70.q.out ql/src/test/results/clientpositive/perf/query70.q.out index d0900a8..b4eaee6 100644 --- ql/src/test/results/clientpositive/perf/query70.q.out +++ ql/src/test/results/clientpositive/perf/query70.q.out @@ -159,7 +159,7 @@ Stage-0 Select Operator [SEL_17] (rows=1704 width=1910) Output:["_col0","_col1"] Filter Operator [FIL_87] (rows=1704 width=1910) - predicate:(s_store_sk is not null and s_state is not null) + predicate:s_store_sk is not null TableScan [TS_15] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] <-Reducer 11 [SIMPLE_EDGE] @@ -196,7 +196,7 @@ Stage-0 Select Operator [SEL_8] (rows=1704 width=1910) Output:["_col0","_col1","_col2"] Filter Operator [FIL_83] (rows=1704 width=1910) - predicate:(s_state is not null and s_store_sk is not null) + predicate:s_store_sk is not null TableScan [TS_6] (rows=1704 width=1910) default@store,s,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county","s_state"] <-Reducer 2 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/perf/query83.q.out ql/src/test/results/clientpositive/perf/query83.q.out index 9960bc7..ee448d4 100644 --- ql/src/test/results/clientpositive/perf/query83.q.out +++ ql/src/test/results/clientpositive/perf/query83.q.out @@ -160,279 +160,279 @@ Stage-0 limit:100 Stage-1 Reducer 6 - File Output Operator [FS_134] - Limit [LIM_133] (rows=100 width=77) + File Output Operator [FS_137] + Limit [LIM_136] (rows=100 width=77) Number of rows:100 - Select Operator [SEL_132] (rows=76653825 width=77) + Select Operator [SEL_135] (rows=76653825 width=77) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_131] - Select Operator [SEL_130] (rows=76653825 width=77) + SHUFFLE [RS_134] + Select Operator [SEL_133] (rows=76653825 width=77) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_228] (rows=76653825 width=77) - Conds:RS_126._col0=RS_127._col0(Inner),RS_126._col0=RS_128._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + Merge Join Operator [MERGEJOIN_231] (rows=76653825 width=77) + Conds:RS_129._col0=RS_130._col0(Inner),RS_129._col0=RS_131._col0(Inner),Output:["_col0","_col1","_col3","_col5"] <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_127] + SHUFFLE [RS_130] PartitionCols:_col0 - Group By Operator [GBY_82] (rows=34842647 width=77) + Group By Operator [GBY_84] (rows=34842647 width=77) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_81] + SHUFFLE [RS_83] PartitionCols:_col0 - Group By Operator [GBY_80] (rows=69685294 width=77) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Select Operator [SEL_79] (rows=69685294 width=77) - Output:["_col4","_col2"] - Merge Join Operator [MERGEJOIN_226] (rows=69685294 width=77) - Conds:RS_76._col0=RS_77._col0(Inner),Output:["_col2","_col4"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_76] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_219] (rows=63350266 width=77) - Conds:RS_73._col1=RS_74._col0(Inner),Output:["_col0","_col2","_col4"] - <-Map 15 [SIMPLE_EDGE] - SHUFFLE [RS_73] - PartitionCols:_col1 - Select Operator [SEL_44] (rows=57591150 width=77) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_206] (rows=57591150 width=77) - predicate:(sr_item_sk is not null and sr_returned_date_sk is not null) - TableScan [TS_42] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_return_quantity"] - <-Map 19 [SIMPLE_EDGE] - SHUFFLE [RS_74] - PartitionCols:_col0 - Select Operator [SEL_47] (rows=462000 width=1436) - Output:["_col0","_col1"] - Filter Operator [FIL_207] (rows=462000 width=1436) - predicate:(i_item_sk is not null and i_item_id is not null) - TableScan [TS_45] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_77] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_221] (rows=80353 width=1119) - Conds:RS_69._col1=RS_70._col0(Inner),Output:["_col0"] - <-Map 20 [SIMPLE_EDGE] - SHUFFLE [RS_69] - PartitionCols:_col1 - Select Operator [SEL_50] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_208] (rows=73049 width=1119) - predicate:(d_date is not null and d_date_sk is not null) - TableScan [TS_48] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_70] - PartitionCols:_col0 - Group By Operator [GBY_67] (rows=40176 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_66] - PartitionCols:_col0 - Group By Operator [GBY_65] (rows=80353 width=1119) - Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_220] (rows=80353 width=1119) - Conds:RS_61._col1=RS_62._col0(Inner),Output:["_col0"] - <-Map 22 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col1 - Select Operator [SEL_53] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_209] (rows=73049 width=1119) - predicate:(d_week_seq is not null and d_date is not null) - TableScan [TS_51] (rows=73049 width=1119) + Group By Operator [GBY_82] (rows=69685294 width=77) + Output:["_col0","_col1"],aggregations:["sum(_col0)"],keys:_col1 + Filter Operator [FIL_80] (rows=69685294 width=77) + predicate:_col1 is not null + Select Operator [SEL_79] (rows=69685294 width=77) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_229] (rows=69685294 width=77) + Conds:RS_76._col0=RS_77._col0(Inner),Output:["_col2","_col4"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_76] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_222] (rows=63350266 width=77) + Conds:RS_73._col1=RS_74._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 15 [SIMPLE_EDGE] + SHUFFLE [RS_73] + PartitionCols:_col1 + Select Operator [SEL_45] (rows=57591150 width=77) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_209] (rows=57591150 width=77) + predicate:(sr_item_sk is not null and sr_returned_date_sk is not null) + TableScan [TS_43] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_return_quantity"] + <-Map 19 [SIMPLE_EDGE] + SHUFFLE [RS_74] + PartitionCols:_col0 + Select Operator [SEL_48] (rows=462000 width=1436) + Output:["_col0","_col1"] + Filter Operator [FIL_210] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_46] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_224] (rows=80353 width=1119) + Conds:RS_69._col1=RS_70._col0(Inner),Output:["_col0"] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col1 + Select Operator [SEL_51] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_211] (rows=73049 width=1119) + predicate:d_date_sk is not null + TableScan [TS_49] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col0 + Group By Operator [GBY_67] (rows=40176 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col0 + Group By Operator [GBY_65] (rows=80353 width=1119) + Output:["_col0"],keys:_col0 + Merge Join Operator [MERGEJOIN_223] (rows=80353 width=1119) + Conds:RS_61._col1=RS_62._col0(Inner),Output:["_col0"] + <-Map 22 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col1 + Select Operator [SEL_53] (rows=73049 width=1119) + Output:["_col0","_col1"] + TableScan [TS_52] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"] - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_62] - PartitionCols:_col0 - Group By Operator [GBY_59] (rows=18262 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Map 25 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col0 - Group By Operator [GBY_57] (rows=36525 width=1119) - Output:["_col0"],keys:d_week_seq - Select Operator [SEL_56] (rows=36525 width=1119) - Output:["d_week_seq"] - Filter Operator [FIL_210] (rows=36525 width=1119) - predicate:((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) - TableScan [TS_54] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col0 + Group By Operator [GBY_59] (rows=18262 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 25 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col0 + Group By Operator [GBY_57] (rows=36525 width=1119) + Output:["_col0"],keys:d_week_seq + Select Operator [SEL_56] (rows=36525 width=1119) + Output:["d_week_seq"] + Filter Operator [FIL_213] (rows=36525 width=1119) + predicate:(d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') + TableScan [TS_54] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"] <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_128] + SHUFFLE [RS_131] PartitionCols:_col0 - Group By Operator [GBY_124] (rows=8711072 width=92) + Group By Operator [GBY_127] (rows=8711072 width=92) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_123] + SHUFFLE [RS_126] PartitionCols:_col0 - Group By Operator [GBY_122] (rows=17422145 width=92) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Select Operator [SEL_121] (rows=17422145 width=92) - Output:["_col4","_col2"] - Merge Join Operator [MERGEJOIN_227] (rows=17422145 width=92) - Conds:RS_118._col0=RS_119._col0(Inner),Output:["_col2","_col4"] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_118] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_222] (rows=15838314 width=92) - Conds:RS_115._col1=RS_116._col0(Inner),Output:["_col0","_col2","_col4"] - <-Map 27 [SIMPLE_EDGE] - SHUFFLE [RS_115] - PartitionCols:_col1 - Select Operator [SEL_86] (rows=14398467 width=92) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_211] (rows=14398467 width=92) - predicate:(wr_item_sk is not null and wr_returned_date_sk is not null) - TableScan [TS_84] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_item_sk","wr_return_quantity"] - <-Map 31 [SIMPLE_EDGE] - SHUFFLE [RS_116] - PartitionCols:_col0 - Select Operator [SEL_89] (rows=462000 width=1436) - Output:["_col0","_col1"] - Filter Operator [FIL_212] (rows=462000 width=1436) - predicate:(i_item_sk is not null and i_item_id is not null) - TableScan [TS_87] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] - <-Reducer 33 [SIMPLE_EDGE] - SHUFFLE [RS_119] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_224] (rows=80353 width=1119) - Conds:RS_111._col1=RS_112._col0(Inner),Output:["_col0"] - <-Map 32 [SIMPLE_EDGE] - SHUFFLE [RS_111] - PartitionCols:_col1 - Select Operator [SEL_92] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_213] (rows=73049 width=1119) - predicate:(d_date is not null and d_date_sk is not null) - TableScan [TS_90] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Reducer 36 [SIMPLE_EDGE] - SHUFFLE [RS_112] - PartitionCols:_col0 - Group By Operator [GBY_109] (rows=40176 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Reducer 35 [SIMPLE_EDGE] - SHUFFLE [RS_108] - PartitionCols:_col0 - Group By Operator [GBY_107] (rows=80353 width=1119) - Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_223] (rows=80353 width=1119) - Conds:RS_103._col1=RS_104._col0(Inner),Output:["_col0"] - <-Map 34 [SIMPLE_EDGE] - SHUFFLE [RS_103] - PartitionCols:_col1 - Select Operator [SEL_95] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_214] (rows=73049 width=1119) - predicate:(d_week_seq is not null and d_date is not null) - TableScan [TS_93] (rows=73049 width=1119) + Group By Operator [GBY_125] (rows=17422145 width=92) + Output:["_col0","_col1"],aggregations:["sum(_col0)"],keys:_col1 + Filter Operator [FIL_123] (rows=17422145 width=92) + predicate:_col1 is not null + Select Operator [SEL_122] (rows=17422145 width=92) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_230] (rows=17422145 width=92) + Conds:RS_119._col0=RS_120._col0(Inner),Output:["_col2","_col4"] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_119] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_225] (rows=15838314 width=92) + Conds:RS_116._col1=RS_117._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 27 [SIMPLE_EDGE] + SHUFFLE [RS_116] + PartitionCols:_col1 + Select Operator [SEL_88] (rows=14398467 width=92) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_214] (rows=14398467 width=92) + predicate:(wr_item_sk is not null and wr_returned_date_sk is not null) + TableScan [TS_86] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_item_sk","wr_return_quantity"] + <-Map 31 [SIMPLE_EDGE] + SHUFFLE [RS_117] + PartitionCols:_col0 + Select Operator [SEL_91] (rows=462000 width=1436) + Output:["_col0","_col1"] + Filter Operator [FIL_215] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_89] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + <-Reducer 33 [SIMPLE_EDGE] + SHUFFLE [RS_120] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_227] (rows=80353 width=1119) + Conds:RS_112._col1=RS_113._col0(Inner),Output:["_col0"] + <-Map 32 [SIMPLE_EDGE] + SHUFFLE [RS_112] + PartitionCols:_col1 + Select Operator [SEL_94] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_216] (rows=73049 width=1119) + predicate:d_date_sk is not null + TableScan [TS_92] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_113] + PartitionCols:_col0 + Group By Operator [GBY_110] (rows=40176 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_109] + PartitionCols:_col0 + Group By Operator [GBY_108] (rows=80353 width=1119) + Output:["_col0"],keys:_col0 + Merge Join Operator [MERGEJOIN_226] (rows=80353 width=1119) + Conds:RS_104._col1=RS_105._col0(Inner),Output:["_col0"] + <-Map 34 [SIMPLE_EDGE] + SHUFFLE [RS_104] + PartitionCols:_col1 + Select Operator [SEL_96] (rows=73049 width=1119) + Output:["_col0","_col1"] + TableScan [TS_95] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"] - <-Reducer 38 [SIMPLE_EDGE] - SHUFFLE [RS_104] - PartitionCols:_col0 - Group By Operator [GBY_101] (rows=18262 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Map 37 [SIMPLE_EDGE] - SHUFFLE [RS_100] - PartitionCols:_col0 - Group By Operator [GBY_99] (rows=36525 width=1119) - Output:["_col0"],keys:d_week_seq - Select Operator [SEL_98] (rows=36525 width=1119) - Output:["d_week_seq"] - Filter Operator [FIL_215] (rows=36525 width=1119) - predicate:((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) - TableScan [TS_96] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"] + <-Reducer 38 [SIMPLE_EDGE] + SHUFFLE [RS_105] + PartitionCols:_col0 + Group By Operator [GBY_102] (rows=18262 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 37 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col0 + Group By Operator [GBY_100] (rows=36525 width=1119) + Output:["_col0"],keys:d_week_seq + Select Operator [SEL_99] (rows=36525 width=1119) + Output:["d_week_seq"] + Filter Operator [FIL_218] (rows=36525 width=1119) + predicate:(d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') + TableScan [TS_97] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_126] + SHUFFLE [RS_129] PartitionCols:_col0 - Group By Operator [GBY_40] (rows=17423323 width=106) + Group By Operator [GBY_41] (rows=17423323 width=106) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_39] + SHUFFLE [RS_40] PartitionCols:_col0 - Group By Operator [GBY_38] (rows=34846646 width=106) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Select Operator [SEL_37] (rows=34846646 width=106) - Output:["_col4","_col2"] - Merge Join Operator [MERGEJOIN_225] (rows=34846646 width=106) - Conds:RS_34._col0=RS_35._col0(Inner),Output:["_col2","_col4"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_216] (rows=31678769 width=106) - Conds:RS_31._col1=RS_32._col0(Inner),Output:["_col0","_col2","_col4"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=28798881 width=106) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_201] (rows=28798881 width=106) - predicate:(cr_item_sk is not null and cr_returned_date_sk is not null) - TableScan [TS_0] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_item_sk","cr_return_quantity"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=462000 width=1436) - Output:["_col0","_col1"] - Filter Operator [FIL_202] (rows=462000 width=1436) - predicate:(i_item_sk is not null and i_item_id is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_35] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_218] (rows=80353 width=1119) - Conds:RS_27._col1=RS_28._col0(Inner),Output:["_col0"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_203] (rows=73049 width=1119) - predicate:(d_date is not null and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col0 - Group By Operator [GBY_25] (rows=40176 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col0 - Group By Operator [GBY_23] (rows=80353 width=1119) - Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_217] (rows=80353 width=1119) - Conds:RS_19._col1=RS_20._col0(Inner),Output:["_col0"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col1 - Select Operator [SEL_11] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_204] (rows=73049 width=1119) - predicate:(d_week_seq is not null and d_date is not null) + Group By Operator [GBY_39] (rows=34846646 width=106) + Output:["_col0","_col1"],aggregations:["sum(_col0)"],keys:_col1 + Filter Operator [FIL_37] (rows=34846646 width=106) + predicate:_col1 is not null + Select Operator [SEL_36] (rows=34846646 width=106) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_228] (rows=34846646 width=106) + Conds:RS_33._col0=RS_34._col0(Inner),Output:["_col2","_col4"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_219] (rows=31678769 width=106) + Conds:RS_30._col1=RS_31._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col1 + Select Operator [SEL_2] (rows=28798881 width=106) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_204] (rows=28798881 width=106) + predicate:(cr_item_sk is not null and cr_returned_date_sk is not null) + TableScan [TS_0] (rows=28798881 width=106) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_item_sk","cr_return_quantity"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=462000 width=1436) + Output:["_col0","_col1"] + Filter Operator [FIL_205] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_3] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_221] (rows=80353 width=1119) + Conds:RS_26._col1=RS_27._col0(Inner),Output:["_col0"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_206] (rows=73049 width=1119) + predicate:d_date_sk is not null + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Group By Operator [GBY_24] (rows=40176 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0 + Group By Operator [GBY_22] (rows=80353 width=1119) + Output:["_col0"],keys:_col0 + Merge Join Operator [MERGEJOIN_220] (rows=80353 width=1119) + Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col0"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1 + Select Operator [SEL_10] (rows=73049 width=1119) + Output:["_col0","_col1"] TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_20] - PartitionCols:_col0 - Group By Operator [GBY_17] (rows=18262 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Group By Operator [GBY_15] (rows=36525 width=1119) - Output:["_col0"],keys:d_week_seq - Select Operator [SEL_14] (rows=36525 width=1119) - Output:["d_week_seq"] - Filter Operator [FIL_205] (rows=36525 width=1119) - predicate:((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) - TableScan [TS_12] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=18262 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Group By Operator [GBY_14] (rows=36525 width=1119) + Output:["_col0"],keys:d_week_seq + Select Operator [SEL_13] (rows=36525 width=1119) + Output:["d_week_seq"] + Filter Operator [FIL_208] (rows=36525 width=1119) + predicate:(d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') + TableScan [TS_11] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"] diff --git ql/src/test/results/clientpositive/repair.q.out ql/src/test/results/clientpositive/repair.q.out index c183464..86ac031 100644 --- ql/src/test/results/clientpositive/repair.q.out +++ ql/src/test/results/clientpositive/repair.q.out @@ -12,32 +12,24 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE default.repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable Partitions not in metastore: repairtable:p1=a/p2=a repairtable:p1=b/p2=a PREHOOK: query: MSCK REPAIR TABLE default.repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK REPAIR TABLE default.repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable Partitions not in metastore: repairtable:p1=a/p2=a repairtable:p1=b/p2=a Repair: Added partition to metastore default.repairtable:p1=a/p2=a Repair: Added partition to metastore default.repairtable:p1=b/p2=a PREHOOK: query: MSCK TABLE repairtable PREHOOK: type: MSCK -PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK -POSTHOOK: Output: default@repairtable PREHOOK: query: DROP TABLE default.repairtable PREHOOK: type: DROPTABLE PREHOOK: Input: default@repairtable diff --git ql/src/test/results/clientpositive/semijoin4.q.out ql/src/test/results/clientpositive/semijoin4.q.out index d6117ed..89e4023 100644 --- ql/src/test/results/clientpositive/semijoin4.q.out +++ ql/src/test/results/clientpositive/semijoin4.q.out @@ -18,7 +18,6 @@ STORED AS orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@table_18 -Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain SELECT COALESCE(498, @@ -71,41 +70,41 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((-92 = tinyint_col_46) and decimal1309_col_65 is not null and bigint_col_13 is not null) (type: boolean) + predicate: (decimal1309_col_65 is not null and bigint_col_13 is not null and tinyint_col_46 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: bigint_col_13 (type: bigint), smallint_col_24 (type: smallint), double_col_60 (type: double), decimal1309_col_65 (type: decimal(13,9)) - outputColumnNames: _col0, _col1, _col3, _col4 + expressions: bigint_col_13 (type: bigint), smallint_col_24 (type: smallint), tinyint_col_46 (type: tinyint), double_col_60 (type: double), decimal1309_col_65 (type: decimal(13,9)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: decimal(27,9)), _col0 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col4 (type: decimal(27,9)), _col0 (type: bigint) + key expressions: _col2 (type: tinyint), _col4 (type: decimal(27,9)), _col0 (type: bigint) + sort order: +++ + Map-reduce partition columns: _col2 (type: tinyint), _col4 (type: decimal(27,9)), _col0 (type: bigint) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: smallint), _col3 (type: double) TableScan alias: t2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((tinyint_col_21 = -92) and tinyint_col_18 is not null and decimal2709_col_9 is not null) (type: boolean) + predicate: (tinyint_col_18 is not null and tinyint_col_21 is not null and decimal2709_col_9 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: decimal2709_col_9 (type: decimal(27,9)), tinyint_col_18 (type: tinyint) - outputColumnNames: _col0, _col1 + expressions: decimal2709_col_9 (type: decimal(27,9)), tinyint_col_18 (type: tinyint), tinyint_col_21 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: decimal(27,9)), UDFToLong(_col1) (type: bigint) - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(27,9)), UDFToLong(_col1) (type: bigint) + key expressions: _col2 (type: tinyint), _col0 (type: decimal(27,9)), UDFToLong(_col1) (type: bigint) + sort order: +++ + Map-reduce partition columns: _col2 (type: tinyint), _col0 (type: decimal(27,9)), UDFToLong(_col1) (type: bigint) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: decimal(27,9)), _col0 (type: bigint) - 1 _col0 (type: decimal(27,9)), UDFToLong(_col1) (type: bigint) - outputColumnNames: _col1, _col3 + 0 _col2 (type: tinyint), _col4 (type: decimal(27,9)), _col0 (type: bigint) + 1 _col2 (type: tinyint), _col0 (type: decimal(27,9)), UDFToLong(_col1) (type: bigint) + outputColumnNames: _col1, _col3, _col7 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -119,22 +118,26 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - sort order: + key expressions: UDFToInteger(_col7) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col7) (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: smallint), _col3 (type: double) + value expressions: _col1 (type: smallint), _col3 (type: double), _col7 (type: tinyint) TableScan Reduce Output Operator - sort order: + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col1, _col3 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + 0 UDFToInteger(_col7) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -147,27 +150,27 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: (UDFToShort(UDFToByte(-92)) + _col1) (type: smallint), floor(_col3) (type: bigint) + key expressions: (UDFToShort(_col7) + _col1) (type: smallint), floor(_col3) (type: bigint) sort order: +- - Map-reduce partition columns: (UDFToShort(UDFToByte(-92)) + _col1) (type: smallint) - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: smallint), _col3 (type: double) + Map-reduce partition columns: (UDFToShort(_col7) + _col1) (type: smallint) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: smallint), _col3 (type: double), _col7 (type: tinyint) Reduce Operator Tree: Select Operator - expressions: VALUE._col1 (type: smallint), VALUE._col3 (type: double) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col1 (type: smallint), VALUE._col3 (type: double), VALUE._col7 (type: tinyint) + outputColumnNames: _col1, _col3, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col1: smallint, _col3: double + output shape: _col1: smallint, _col3: double, _col7: tinyint type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: (UDFToShort(UDFToByte(-92)) + _col1) ASC NULLS FIRST, floor(_col3) DESC NULLS LAST - partition by: (UDFToShort(UDFToByte(-92)) + _col1) + order by: (UDFToShort(_col7) + _col1) ASC NULLS FIRST, floor(_col3) DESC NULLS LAST + partition by: (UDFToShort(_col7) + _col1) raw input shape: window functions: window function definition @@ -177,14 +180,14 @@ STAGE PLANS: window function: GenericUDAFLeadEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: COALESCE(498,LEAD_window_0,524) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -258,14 +261,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/semijoin5.q.out ql/src/test/results/clientpositive/semijoin5.q.out index 07b7470..20d372a 100644 --- ql/src/test/results/clientpositive/semijoin5.q.out +++ ql/src/test/results/clientpositive/semijoin5.q.out @@ -122,9 +122,9 @@ STAGE PLANS: value expressions: _col3 (type: double), _col5 (type: smallint), _col7 (type: int) TableScan Reduce Output Operator - key expressions: _col1 (type: timestamp), -92 (type: int) + key expressions: _col1 (type: timestamp), _col0 (type: int) sort order: ++ - Map-reduce partition columns: _col1 (type: timestamp), -92 (type: int) + Map-reduce partition columns: _col1 (type: timestamp), _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Join Operator @@ -132,7 +132,7 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col2 (type: timestamp), UDFToInteger(_col9) (type: int) - 1 _col1 (type: timestamp), -92 (type: int) + 1 _col1 (type: timestamp), _col0 (type: int) outputColumnNames: _col3, _col5, _col7 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -388,36 +388,40 @@ STAGE PLANS: 1 _col0 (type: decimal(26,12)) outputColumnNames: _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col2 (type: timestamp) - mode: hash - outputColumnNames: _col0 + Select Operator + expressions: _col2 (type: timestamp) + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col1 (type: timestamp), -92 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-8 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: timestamp) - sort order: + - Map-reduce partition columns: _col0 (type: timestamp) + key expressions: _col0 (type: timestamp), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: timestamp) + keys: KEY._col0 (type: timestamp), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: timestamp) - outputColumnNames: _col1 + expressions: _col1 (type: int), _col0 (type: timestamp) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/show_functions.q.out ql/src/test/results/clientpositive/show_functions.q.out index 3c9bb4a..b8daea9 100644 --- ql/src/test/results/clientpositive/show_functions.q.out +++ ql/src/test/results/clientpositive/show_functions.q.out @@ -38,7 +38,6 @@ between bin bloom_filter bround -cardinality_violation case cbrt ceil @@ -264,7 +263,6 @@ PREHOOK: query: SHOW FUNCTIONS '^c.*' PREHOOK: type: SHOWFUNCTIONS POSTHOOK: query: SHOW FUNCTIONS '^c.*' POSTHOOK: type: SHOWFUNCTIONS -cardinality_violation case cbrt ceil diff --git ql/src/test/results/clientpositive/show_tblproperties.q.out ql/src/test/results/clientpositive/show_tblproperties.q.out index 005de89..9377beb 100644 --- ql/src/test/results/clientpositive/show_tblproperties.q.out +++ ql/src/test/results/clientpositive/show_tblproperties.q.out @@ -36,7 +36,6 @@ PREHOOK: query: show tblproperties tmpfoo PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: show tblproperties tmpfoo POSTHOOK: type: SHOW_TBLPROPERTIES -COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bar bar value #### A masked pattern was here #### numFiles 0 @@ -54,7 +53,6 @@ PREHOOK: query: show tblproperties default.tmpfoo PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: show tblproperties default.tmpfoo POSTHOOK: type: SHOW_TBLPROPERTIES -COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bar bar value #### A masked pattern was here #### numFiles 0 @@ -108,7 +106,6 @@ PREHOOK: query: show tblproperties default.tmpfoo PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: show tblproperties default.tmpfoo POSTHOOK: type: SHOW_TBLPROPERTIES -COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bar bar value #### A masked pattern was here #### numFiles 0 @@ -126,7 +123,6 @@ PREHOOK: query: show tblproperties tmpfoo PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: show tblproperties tmpfoo POSTHOOK: type: SHOW_TBLPROPERTIES -COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bar bar value1 #### A masked pattern was here #### numFiles 0 @@ -150,7 +146,6 @@ PREHOOK: query: show tblproperties db1.tmpfoo PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: show tblproperties db1.tmpfoo POSTHOOK: type: SHOW_TBLPROPERTIES -COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bar bar value1 #### A masked pattern was here #### numFiles 0 diff --git ql/src/test/results/clientpositive/spark/subquery_in.q.out ql/src/test/results/clientpositive/spark/subquery_in.q.out index 6cc7fa7..37a6617 100644 --- ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -25,19 +25,16 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan @@ -65,10 +62,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -334,19 +331,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_size is not null (type: boolean) + Select Operator + expressions: p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: UDFToDouble(_col1) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col1) (type: double) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col1) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col1) (type: double) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: int) + value expressions: _col0 (type: string), _col1 (type: int) Map 3 Map Operator Tree: TableScan @@ -425,19 +419,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Reducer 6 Reduce Operator Tree: Group By Operator @@ -479,21 +470,23 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### almond antique medium spring khaki 6 almond antique salmon chartreuse burlywood 6 -PREHOOK: query: explain -select p_mfgr, p_name, p_size -from part b where b.p_size in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr - ) +PREHOOK: query: explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value = a.value and a.key > '9' + ) PREHOOK: type: QUERY -POSTHOOK: query: explain -select p_mfgr, p_name, p_size -from part b where b.p_size in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr - ) +POSTHOOK: query: explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value = a.value and a.key > '9' + ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -503,372 +496,129 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Reducer 5 (GROUP, 2) - Reducer 7 <- Reducer 6 (GROUP, 2) - Reducer 9 <- Map 8 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Map 8 + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 6 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_mfgr (type: string) - outputColumnNames: p_mfgr - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: p_mfgr (type: string) + keys: value (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col2 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col2, _col5 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col5 - name: rank - window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Reducer 5 - Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: int) - outputColumnNames: _col2, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col1) - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Reducer 6 - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: int) + keys: _col0 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reducer 7 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 5 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE - Reducer 9 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Reducer 7 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select p_mfgr, p_name, p_size -from part b where b.p_size in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr - ) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size -from part b where b.p_size in - (select min(p_size) - from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr - ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -Manufacturer#1 almond antique burnished rose metallic 2 -Manufacturer#1 almond antique burnished rose metallic 2 -Manufacturer#2 almond aquamarine midnight light salmon 2 -Manufacturer#3 almond antique misty red olive 1 -Manufacturer#4 almond aquamarine yellow dodger mint 7 -Manufacturer#5 almond antique sky peru orange 2 -PREHOOK: query: explain -select * -from src b -where b.key in - (select distinct a.key - from src a - where b.value = a.value and a.key > '9' - ) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select * -from src b -where b.key in - (select distinct a.key - from src a - where b.value = a.value and a.key > '9' - ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 6 (GROUP, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: value (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE - Reducer 7 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -977,7 +727,7 @@ STAGE PLANS: alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) @@ -995,7 +745,7 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + predicate: (l_shipmode = 'AIR') (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int) @@ -1580,19 +1330,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_size is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (_col5 - 1) (type: int) - sort order: + - Map-reduce partition columns: (_col5 - 1) (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 3 Map Operator Tree: TableScan @@ -1643,19 +1390,16 @@ STAGE PLANS: expressions: _col1 (type: int) outputColumnNames: _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reducer 5 Reduce Operator Tree: Group By Operator @@ -1715,19 +1459,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_partkey is not null and p_size is not null) (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: (_col0 * _col5) (type: int) + sort order: + + Map-reduce partition columns: (_col0 * _col5) (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (_col0 * _col5) (type: int) - sort order: + - Map-reduce partition columns: (_col0 * _col5) (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 3 Map Operator Tree: TableScan @@ -1778,19 +1519,16 @@ STAGE PLANS: expressions: _col1 (type: int) outputColumnNames: _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reducer 5 Reduce Operator Tree: Group By Operator @@ -2008,19 +1746,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_retailprice is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: floor(_col7) (type: bigint) + sort order: + + Map-reduce partition columns: floor(_col7) (type: bigint) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: floor(_col7) (type: bigint) - sort order: + - Map-reduce partition columns: floor(_col7) (type: bigint) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 3 Map Operator Tree: TableScan @@ -2068,26 +1803,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: double) - outputColumnNames: _col1 + expressions: floor(_col1) (type: bigint) + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: floor(_col1) is not null (type: boolean) + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: floor(_col1) (type: bigint) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reducer 5 Reduce Operator Tree: Group By Operator @@ -2829,26 +2557,24 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) Map 5 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: value (type: string) @@ -2912,15 +2638,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Map-reduce partition columns: _col1 (type: bigint) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string) Reducer 4 Reduce Operator Tree: Join Operator @@ -2962,19 +2685,16 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reducer 9 Reduce Operator Tree: Group By Operator @@ -3339,19 +3059,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_size is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (_col5 - 1) (type: int) - sort order: + - Map-reduce partition columns: (_col5 - 1) (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 4 Map Operator Tree: TableScan @@ -3413,19 +3130,16 @@ STAGE PLANS: expressions: _col1 (type: int) outputColumnNames: _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reducer 6 Reduce Operator Tree: Group By Operator @@ -3486,19 +3200,16 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_size is not null (type: boolean) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: (_col5 - 1) (type: int) + sort order: + + Map-reduce partition columns: (_col5 - 1) (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (_col5 - 1) (type: int) - sort order: + - Map-reduce partition columns: (_col5 - 1) (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 4 Map Operator Tree: TableScan @@ -3564,19 +3275,16 @@ STAGE PLANS: expressions: _col1 (type: int) outputColumnNames: _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reducer 6 Reduce Operator Tree: Group By Operator @@ -3629,63 +3337,54 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_brand is not null (type: boolean) + Select Operator + expressions: p_brand (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_brand (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -4803,9 +4502,39 @@ POSTHOOK: Input: default@part 85768 86428 90681 -PREHOOK: query: explain select * from part where p_size IN (select count(*) from part pp where pp.p_type = part.p_type) +PREHOOK: query: create table t(i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t values(1) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values(1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values(0) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values(0) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table tempty(i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tempty +POSTHOOK: query: create table tempty(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tempty +PREHOOK: query: explain select * from t where i IN (select count(*) from tempty) PREHOOK: type: QUERY -POSTHOOK: query: explain select * from part where p_size IN (select count(*) from part pp where pp.p_type = part.p_type) +POSTHOOK: query: explain select * from t where i IN (select count(*) from tempty) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -4815,1489 +4544,43 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 14 (PARTITION-LEVEL SORT, 2) - Reducer 11 <- Reducer 10 (GROUP, 2) - Reducer 12 <- Reducer 11 (GROUP, 2) - Reducer 14 <- Map 13 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 12 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 14 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: t + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: string) + key expressions: UDFToLong(_col0) (type: bigint) sort order: + - Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Map 13 + Map-reduce partition columns: UDFToLong(_col0) (type: bigint) + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 3 Map Operator Tree: TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: tempty + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: p_type (type: string) - outputColumnNames: p_type - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - keys: p_type (type: string) + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_type (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reducer 10 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 11 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reducer 12 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reducer 14 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col4 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col10, true) > 0) (type: boolean) - Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: string), UDFToLong(_col5) (type: bigint) - sort order: ++ - Map-reduce partition columns: _col4 (type: string), UDFToLong(_col5) (type: bigint) - Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: string), UDFToLong(_col5) (type: bigint) - 1 _col1 (type: string), _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 9 Data size: 1223 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 1223 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from part where p_size IN (select count(*) from part pp where pp.p_type = part.p_type) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select * from part where p_size IN (select count(*) from part pp where pp.p_type = part.p_type) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -PREHOOK: query: explain select * from part where p_size in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from part where p_size in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) - Reducer 6 <- Reducer 5 (GROUP, 2) - Reducer 8 <- Map 7 (GROUP, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), UDFToDouble(_col5) (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), UDFToDouble(_col5) (type: double) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Map 7 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int) - outputColumnNames: p_partkey - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), UDFToDouble(_col5) (type: double) - 1 _col1 (type: int), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: int) - outputColumnNames: _col2, _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(_col1) - keys: _col2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: double) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: double), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col0 (type: double) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col0 (type: double) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from part where p_size in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select * from part where p_size in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ -110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously -112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even -144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about -146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref -15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve -191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle -192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir -195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de -33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join JOIN[12][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product -PREHOOK: query: explain select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 4 (GROUP, 2) - Reducer 6 <- Reducer 5 (GROUP, 2) - Reducer 8 <- Map 7 (GROUP, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col5 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col5 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) - Map 7 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int) - outputColumnNames: p_partkey - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col5 (type: int) - 1 _col1 (type: int), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 30 Data size: 7485 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 7485 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 > _col2) (type: boolean) - Statistics: Num rows: 112 Data size: 27220 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: int) - outputColumnNames: _col2, _col1 - Statistics: Num rows: 112 Data size: 27220 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col1) - keys: _col2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 112 Data size: 27220 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 112 Data size: 27220 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 13610 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 13610 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 56 Data size: 13610 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 6805 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 6805 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col0 (type: int) - Statistics: Num rows: 28 Data size: 6805 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[12][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product -PREHOOK: query: select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref -PREHOOK: query: explain select * from part where p_size NOT IN (select count(*) from part pp where pp.p_type = part.p_type) -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from part where p_size NOT IN (select count(*) from part pp where pp.p_type = part.p_type) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 2), Reducer 14 (PARTITION-LEVEL SORT, 2) - Reducer 12 <- Reducer 11 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 14 <- Map 13 (GROUP, 2) - Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 2), Reducer 21 (PARTITION-LEVEL SORT, 2) - Reducer 17 <- Reducer 16 (GROUP, 2) - Reducer 18 <- Reducer 17 (GROUP, 2) - Reducer 19 <- Reducer 18 (PARTITION-LEVEL SORT, 2), Reducer 23 (PARTITION-LEVEL SORT, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 21 <- Map 13 (GROUP, 2) - Reducer 23 <- Map 22 (GROUP, 2) - Reducer 3 <- Reducer 12 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 19 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 10 (PARTITION-LEVEL SORT, 2), Reducer 14 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Reducer 6 (GROUP PARTITION-LEVEL SORT, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: string) - sort order: + - Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Map 10 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_type (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 13 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_type (type: string) - outputColumnNames: p_type - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_type (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 15 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_type (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 22 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_size (type: int) - outputColumnNames: p_size - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_size (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 12 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 14 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reducer 16 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 17 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reducer 18 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), _col1 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: boolean) - Reducer 19 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 UDFToLong(_col0) (type: bigint) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col3 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: boolean) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col4 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col10, true) > 0) (type: boolean) - Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: string) - sort order: + - Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Reducer 21 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reducer 23 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToLong(_col0) (type: bigint) - sort order: + - Map-reduce partition columns: UDFToLong(_col0) (type: bigint) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col4 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13 - Statistics: Num rows: 9 Data size: 1223 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: int), _col4 (type: string) - sort order: ++ - Map-reduce partition columns: _col5 (type: int), _col4 (type: string) - Statistics: Num rows: 9 Data size: 1223 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: bigint), _col13 (type: bigint) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col5 (type: int), _col4 (type: string) - 1 _col3 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: CASE WHEN ((_col12 = 0)) THEN (true) WHEN (_col12 is null) THEN (true) WHEN (_col16 is not null) THEN (false) WHEN (_col5 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (false) ELSE (true) END (type: boolean) - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reducer 7 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from part where p_size NOT IN (select count(*) from part pp where pp.p_type = part.p_type) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select * from part where p_size NOT IN (select count(*) from part pp where pp.p_type = part.p_type) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ -110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously -112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car -132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even -144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about -146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref -15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra -17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve -191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle -192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir -195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de -33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -PREHOOK: query: explain select * from part where p_size not in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from part where p_size not in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 2), Reducer 14 (PARTITION-LEVEL SORT, 2) - Reducer 12 <- Reducer 11 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 14 <- Map 13 (GROUP, 2) - Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 2), Reducer 21 (PARTITION-LEVEL SORT, 2) - Reducer 17 <- Reducer 16 (GROUP, 2) - Reducer 18 <- Reducer 17 (GROUP, 2) - Reducer 19 <- Reducer 18 (PARTITION-LEVEL SORT, 2), Reducer 23 (PARTITION-LEVEL SORT, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 21 <- Map 13 (GROUP, 2) - Reducer 23 <- Map 22 (GROUP, 2) - Reducer 3 <- Reducer 12 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 19 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 14 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Reducer 6 (GROUP PARTITION-LEVEL SORT, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Map 10 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Map 13 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int) - outputColumnNames: p_partkey - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 15 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_size (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Map 22 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_size (type: int) - outputColumnNames: p_size - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_size (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: pp - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: int) - outputColumnNames: _col2, _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(_col1) - keys: _col2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) - Reducer 12 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(_col1) - keys: _col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 14 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reducer 16 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: int) - outputColumnNames: _col2, _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(_col1) - keys: _col2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) - Reducer 17 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: double), _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reducer 18 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: double), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: boolean) - Reducer 19 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col3 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: boolean) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col10, true) > 0) (type: boolean) - Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Reducer 21 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reducer 23 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13 - Statistics: Num rows: 9 Data size: 1223 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col5 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col5 (type: int) - Statistics: Num rows: 9 Data size: 1223 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: bigint), _col13 (type: bigint) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: int), _col5 (type: int) - 1 _col1 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: CASE WHEN ((_col12 = 0)) THEN (true) WHEN (_col12 is null) THEN (true) WHEN (_col16 is not null) THEN (false) WHEN (_col5 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (false) ELSE (true) END (type: boolean) - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reducer 7 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from part where p_size not in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select * from part where p_size not in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -PREHOOK: query: create table t(i int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t -POSTHOOK: query: create table t(i int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t -PREHOOK: query: insert into t values(1) -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values(1) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: insert into t values(0) -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values(0) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: create table tempty(i int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tempty -POSTHOOK: query: create table tempty(i int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tempty -PREHOOK: query: explain select * from t where i IN (select count(*) from tempty) -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t where i IN (select count(*) from tempty) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 5 <- Reducer 4 (GROUP, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: i is not null (type: boolean) - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToLong(_col0) (type: bigint) - sort order: + - Map-reduce partition columns: UDFToLong(_col0) (type: bigint) - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Map 3 - Map Operator Tree: - TableScan - alias: tempty - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 2 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -6321,19 +4604,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 5 Reduce Operator Tree: Group By Operator @@ -6433,221 +4713,3 @@ POSTHOOK: query: drop table tempty POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tempty POSTHOOK: Output: default@tempty -PREHOOK: query: create table t(i int, j int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t -POSTHOOK: query: create table t(i int, j int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t -PREHOOK: query: insert into t values(0,1), (0,2) -PREHOOK: type: QUERY -PREHOOK: Output: default@t -POSTHOOK: query: insert into t values(0,1), (0,2) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@t -POSTHOOK: Lineage: t.i EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: t.j EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: create table tt(i int, j int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tt -POSTHOOK: query: create table tt(i int, j int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tt -PREHOOK: query: insert into tt values(0,3) -PREHOOK: type: QUERY -PREHOOK: Output: default@tt -POSTHOOK: query: insert into tt values(0,3) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@tt -POSTHOOK: Lineage: tt.i EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: tt.j EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain select * from t where i IN (select sum(i) from tt where tt.j = t.j) -PREHOOK: type: QUERY -POSTHOOK: query: explain select * from t where i IN (select sum(i) from tt where tt.j = t.j) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) - Reducer 6 <- Reducer 5 (GROUP, 2) - Reducer 8 <- Map 7 (GROUP, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), j (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToLong(_col0) (type: bigint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: UDFToLong(_col0) (type: bigint), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Map 3 - Map Operator Tree: - TableScan - alias: tt - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), j (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Map 7 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: j (type: int) - outputColumnNames: j - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: j (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToLong(_col0) (type: bigint), _col1 (type: int) - 1 _col0 (type: bigint), _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col0 (type: int) - outputColumnNames: _col2, _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - keys: _col2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint), _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from t where i IN (select sum(i) from tt where tt.j = t.j) -PREHOOK: type: QUERY -PREHOOK: Input: default@t -PREHOOK: Input: default@tt -#### A masked pattern was here #### -POSTHOOK: query: select * from t where i IN (select sum(i) from tt where tt.j = t.j) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t -POSTHOOK: Input: default@tt -#### A masked pattern was here #### -PREHOOK: query: drop table t -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@t -PREHOOK: Output: default@t -POSTHOOK: query: drop table t -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@t -POSTHOOK: Output: default@t -PREHOOK: query: drop table tt -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@tt -PREHOOK: Output: default@tt -POSTHOOK: query: drop table tt -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@tt -POSTHOOK: Output: default@tt diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index c84363f..ea2c9a7 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -28,7 +28,7 @@ STAGE PLANS: alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) @@ -46,7 +46,7 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + predicate: (l_shipmode = 'AIR') (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int) diff --git ql/src/test/results/clientpositive/stats_invalidation.q.out ql/src/test/results/clientpositive/stats_invalidation.q.out index a0e7663..d822f4f 100644 --- ql/src/test/results/clientpositive/stats_invalidation.q.out +++ ql/src/test/results/clientpositive/stats_invalidation.q.out @@ -88,7 +88,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} #### A masked pattern was here #### numFiles 1 numRows 500 diff --git ql/src/test/results/clientpositive/stats_missing_warning.q.out ql/src/test/results/clientpositive/stats_missing_warning.q.out deleted file mode 100644 index 0ed70a0..0000000 --- ql/src/test/results/clientpositive/stats_missing_warning.q.out +++ /dev/null @@ -1,159 +0,0 @@ -PREHOOK: query: DROP TABLE IF EXISTS missing_stats_t1 -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS missing_stats_t1 -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE IF EXISTS missing_stats_t2 -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS missing_stats_t2 -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE IF EXISTS missing_stats_t3 -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS missing_stats_t3 -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE missing_stats_t1 (key STRING, value STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@missing_stats_t1 -POSTHOOK: query: CREATE TABLE missing_stats_t1 (key STRING, value STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@missing_stats_t1 -PREHOOK: query: CREATE TABLE missing_stats_t2 (key STRING, value STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@missing_stats_t2 -POSTHOOK: query: CREATE TABLE missing_stats_t2 (key STRING, value STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@missing_stats_t2 -PREHOOK: query: CREATE TABLE missing_stats_t3 (key STRING, value STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@missing_stats_t3 -POSTHOOK: query: CREATE TABLE missing_stats_t3 (key STRING, value STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@missing_stats_t3 -PREHOOK: query: INSERT INTO missing_stats_t1 (key, value) - SELECT key, value - FROM src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@missing_stats_t1 -POSTHOOK: query: INSERT INTO missing_stats_t1 (key, value) - SELECT key, value - FROM src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@missing_stats_t1 -POSTHOOK: Lineage: missing_stats_t1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: missing_stats_t1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: INSERT INTO missing_stats_t2 (key, value) - SELECT key, value - FROM src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@missing_stats_t2 -POSTHOOK: query: INSERT INTO missing_stats_t2 (key, value) - SELECT key, value - FROM src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@missing_stats_t2 -POSTHOOK: Lineage: missing_stats_t2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: missing_stats_t2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: INSERT INTO missing_stats_t3 (key, value) - SELECT key, value - FROM src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@missing_stats_t3 -POSTHOOK: query: INSERT INTO missing_stats_t3 (key, value) - SELECT key, value - FROM src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@missing_stats_t3 -POSTHOOK: Lineage: missing_stats_t3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: missing_stats_t3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT COUNT(*) -FROM missing_stats_t1 t1 -JOIN missing_stats_t2 t2 ON t1.value = t2.key -JOIN missing_stats_t3 t3 ON t2.key = t3.value -PREHOOK: type: QUERY -PREHOOK: Input: default@missing_stats_t1 -PREHOOK: Input: default@missing_stats_t2 -PREHOOK: Input: default@missing_stats_t3 -#### A masked pattern was here #### -POSTHOOK: query: SELECT COUNT(*) -FROM missing_stats_t1 t1 -JOIN missing_stats_t2 t2 ON t1.value = t2.key -JOIN missing_stats_t3 t3 ON t2.key = t3.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@missing_stats_t1 -POSTHOOK: Input: default@missing_stats_t2 -POSTHOOK: Input: default@missing_stats_t3 -#### A masked pattern was here #### -0 -PREHOOK: query: SELECT COUNT(*) -FROM missing_stats_t1 t1 -JOIN missing_stats_t2 t2 ON t1.value = t2.key -JOIN missing_stats_t3 t3 ON t2.key = t3.value -PREHOOK: type: QUERY -PREHOOK: Input: default@missing_stats_t1 -PREHOOK: Input: default@missing_stats_t2 -PREHOOK: Input: default@missing_stats_t3 -#### A masked pattern was here #### -POSTHOOK: query: SELECT COUNT(*) -FROM missing_stats_t1 t1 -JOIN missing_stats_t2 t2 ON t1.value = t2.key -JOIN missing_stats_t3 t3 ON t2.key = t3.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@missing_stats_t1 -POSTHOOK: Input: default@missing_stats_t2 -POSTHOOK: Input: default@missing_stats_t3 -#### A masked pattern was here #### -0 -PREHOOK: query: ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@missing_stats_t1 -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@missing_stats_t1 -#### A masked pattern was here #### -PREHOOK: query: ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@missing_stats_t2 -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@missing_stats_t2 -#### A masked pattern was here #### -PREHOOK: query: ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS -PREHOOK: type: QUERY -PREHOOK: Input: default@missing_stats_t3 -#### A masked pattern was here #### -POSTHOOK: query: ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS -POSTHOOK: type: QUERY -POSTHOOK: Input: default@missing_stats_t3 -#### A masked pattern was here #### -PREHOOK: query: SELECT COUNT(*) -FROM missing_stats_t1 t1 -JOIN missing_stats_t2 t2 ON t1.value = t2.key -JOIN missing_stats_t3 t3 ON t2.key = t3.value -PREHOOK: type: QUERY -PREHOOK: Input: default@missing_stats_t1 -PREHOOK: Input: default@missing_stats_t2 -PREHOOK: Input: default@missing_stats_t3 -#### A masked pattern was here #### -POSTHOOK: query: SELECT COUNT(*) -FROM missing_stats_t1 t1 -JOIN missing_stats_t2 t2 ON t1.value = t2.key -JOIN missing_stats_t3 t3 ON t2.key = t3.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@missing_stats_t1 -POSTHOOK: Input: default@missing_stats_t2 -POSTHOOK: Input: default@missing_stats_t3 -#### A masked pattern was here #### -0 diff --git ql/src/test/results/clientpositive/subq_where_serialization.q.out ql/src/test/results/clientpositive/subq_where_serialization.q.out index 7feb0c7..5deb9d9 100644 --- ql/src/test/results/clientpositive/subq_where_serialization.q.out +++ ql/src/test/results/clientpositive/subq_where_serialization.q.out @@ -15,8 +15,9 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: string) @@ -57,17 +58,14 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/subquery_in_having.q.out ql/src/test/results/clientpositive/subquery_in_having.q.out index 8cd1208..d18d4e2 100644 --- ql/src/test/results/clientpositive/subquery_in_having.q.out +++ ql/src/test/results/clientpositive/subquery_in_having.q.out @@ -87,15 +87,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -162,20 +159,17 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -299,8 +293,9 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_size Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(p_size) @@ -367,8 +362,9 @@ STAGE PLANS: TableScan alias: part_subq Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_size Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(p_size), min(p_size) @@ -447,8 +443,9 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_size Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(p_size) @@ -465,8 +462,9 @@ STAGE PLANS: TableScan alias: part_subq Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) + Select Operator + expressions: p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_size Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(p_size), min(p_size) @@ -602,19 +600,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '8') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -633,9 +628,9 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator - Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 498 Data size: 5289 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -660,9 +655,9 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 498 Data size: 5289 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -701,15 +696,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -802,20 +794,17 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -881,19 +870,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '8') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -912,9 +898,9 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator - Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 498 Data size: 5289 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -939,9 +925,9 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 498 Data size: 5289 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -980,15 +966,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1081,20 +1064,17 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1746,8 +1726,9 @@ STAGE PLANS: TableScan alias: part_subq Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: p_name, p_mfgr, p_size Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(p_size) @@ -1875,24 +1856,21 @@ STAGE PLANS: window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(MAX)~CURRENT Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: first_value_window_0 is not null (type: boolean) + Select Operator + expressions: first_value_window_0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: first_value_window_0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/subquery_notexists.q.out ql/src/test/results/clientpositive/subquery_notexists.q.out index b916d2f..b8df48b 100644 --- ql/src/test/results/clientpositive/subquery_notexists.q.out +++ ql/src/test/results/clientpositive/subquery_notexists.q.out @@ -866,931 +866,3 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### 98 val_98 98 val_98 -Warning: Shuffle Join JOIN[28][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[12][tables = [$hdt$_3, $hdt$_4]] in Stage 'Stage-6:MAPRED' is a cross product -PREHOOK: query: explain SELECT p1.p_name FROM part p1 LEFT JOIN (select p_type as p_col from part ) p2 WHERE NOT EXISTS - (select pp1.p_type as p_col from part pp1 where pp1.p_partkey = p2.p_col) -PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT p1.p_name FROM part p1 LEFT JOIN (select p_type as p_col from part ) p2 WHERE NOT EXISTS - (select pp1.p_type as p_col from part pp1 where pp1.p_partkey = p2.p_col) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-6 is a root stage - Stage-7 depends on stages: Stage-6 - Stage-4 depends on stages: Stage-7 - Stage-5 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_type (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 676 Data size: 164320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 676 Data size: 164320 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 185 Data size: 23367 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 743 Data size: 180752 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 371 Data size: 90254 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 371 Data size: 90254 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 371 Data size: 90254 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_type (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1 - Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 338 Data size: 42601 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: pp1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 338 Data size: 42601 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 371 Data size: 46861 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 371 Data size: 46861 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 371 Data size: 46861 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 185 Data size: 23367 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 185 Data size: 23367 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[28][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[12][tables = [$hdt$_3, $hdt$_4]] in Stage 'Stage-6:MAPRED' is a cross product -PREHOOK: query: SELECT p1.p_name FROM part p1 LEFT JOIN (select p_type as p_col from part ) p2 WHERE NOT EXISTS - (select pp1.p_type as p_col from part pp1 where pp1.p_partkey = p2.p_col) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: SELECT p1.p_name FROM part p1 LEFT JOIN (select p_type as p_col from part ) p2 WHERE NOT EXISTS - (select pp1.p_type as p_col from part pp1 where pp1.p_partkey = p2.p_col) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -almond antique metallic orange dim -almond aquamarine midnight light salmon -almond antique olive coral navajo -almond antique blue firebrick mint -almond antique chartreuse lavender yellow -almond aquamarine sandy cyan gainsboro -almond azure aquamarine papaya violet -almond antique forest lavender goldenrod -almond aquamarine floral ivory bisque -almond aquamarine pink moccasin thistle -almond aquamarine dodger light gainsboro -almond antique salmon chartreuse burlywood -almond antique violet turquoise frosted -almond antique burnished rose metallic -almond aquamarine yellow dodger mint -almond antique violet chocolate turquoise -almond antique sky peru orange -almond aquamarine rose maroon antique -almond antique burnished rose metallic -almond antique chartreuse khaki white -almond antique violet mint lemon -almond azure blanched chiffon midnight -almond antique gainsboro frosted violet -almond antique misty red olive -almond aquamarine burnished black steel -almond antique medium spring khaki -almond antique salmon chartreuse burlywood -almond antique blue firebrick mint -almond aquamarine pink moccasin thistle -almond aquamarine floral ivory bisque -almond antique gainsboro frosted violet -almond aquamarine yellow dodger mint -almond aquamarine dodger light gainsboro -almond aquamarine burnished black steel -almond azure blanched chiffon midnight -almond antique violet turquoise frosted -almond antique sky peru orange -almond antique burnished rose metallic -almond antique violet chocolate turquoise -almond antique medium spring khaki -almond antique chartreuse khaki white -almond antique burnished rose metallic -almond aquamarine rose maroon antique -almond aquamarine midnight light salmon -almond antique chartreuse lavender yellow -almond antique olive coral navajo -almond antique misty red olive -almond aquamarine sandy cyan gainsboro -almond antique violet mint lemon -almond azure aquamarine papaya violet -almond antique forest lavender goldenrod -almond antique metallic orange dim -almond antique salmon chartreuse burlywood -almond antique blue firebrick mint -almond antique violet chocolate turquoise -almond antique burnished rose metallic -almond antique forest lavender goldenrod -almond aquamarine midnight light salmon -almond antique misty red olive -almond antique gainsboro frosted violet -almond antique olive coral navajo -almond antique metallic orange dim -almond antique chartreuse khaki white -almond aquamarine yellow dodger mint -almond antique burnished rose metallic -almond aquamarine rose maroon antique -almond aquamarine dodger light gainsboro -almond antique chartreuse lavender yellow -almond aquamarine pink moccasin thistle -almond aquamarine burnished black steel -almond azure aquamarine papaya violet -almond antique violet mint lemon -almond azure blanched chiffon midnight -almond antique violet turquoise frosted -almond antique sky peru orange -almond antique medium spring khaki -almond aquamarine floral ivory bisque -almond aquamarine sandy cyan gainsboro -almond antique violet mint lemon -almond antique chartreuse lavender yellow -almond aquamarine burnished black steel -almond antique blue firebrick mint -almond antique violet chocolate turquoise -almond antique burnished rose metallic -almond aquamarine rose maroon antique -almond aquamarine yellow dodger mint -almond aquamarine sandy cyan gainsboro -almond aquamarine floral ivory bisque -almond azure aquamarine papaya violet -almond antique metallic orange dim -almond antique sky peru orange -almond antique burnished rose metallic -almond antique misty red olive -almond antique olive coral navajo -almond aquamarine dodger light gainsboro -almond aquamarine midnight light salmon -almond aquamarine pink moccasin thistle -almond antique violet turquoise frosted -almond antique forest lavender goldenrod -almond antique chartreuse khaki white -almond antique medium spring khaki -almond antique gainsboro frosted violet -almond azure blanched chiffon midnight -almond antique salmon chartreuse burlywood -almond azure aquamarine papaya violet -almond antique salmon chartreuse burlywood -almond aquamarine rose maroon antique -almond antique forest lavender goldenrod -almond antique metallic orange dim -almond antique blue firebrick mint -almond aquamarine pink moccasin thistle -almond aquamarine yellow dodger mint -almond aquamarine dodger light gainsboro -almond antique violet turquoise frosted -almond antique sky peru orange -almond antique violet chocolate turquoise -almond antique chartreuse khaki white -almond antique burnished rose metallic -almond antique gainsboro frosted violet -almond antique burnished rose metallic -almond antique violet mint lemon -almond aquamarine burnished black steel -almond antique medium spring khaki -almond antique misty red olive -almond aquamarine midnight light salmon -almond aquamarine sandy cyan gainsboro -almond antique olive coral navajo -almond antique chartreuse lavender yellow -almond azure blanched chiffon midnight -almond aquamarine floral ivory bisque -almond azure blanched chiffon midnight -almond antique blue firebrick mint -almond antique chartreuse lavender yellow -almond azure aquamarine papaya violet -almond antique chartreuse khaki white -almond aquamarine midnight light salmon -almond antique sky peru orange -almond antique misty red olive -almond aquamarine yellow dodger mint -almond antique violet mint lemon -almond aquamarine pink moccasin thistle -almond aquamarine sandy cyan gainsboro -almond antique burnished rose metallic -almond antique salmon chartreuse burlywood -almond antique forest lavender goldenrod -almond antique metallic orange dim -almond antique violet turquoise frosted -almond antique violet chocolate turquoise -almond aquamarine dodger light gainsboro -almond antique gainsboro frosted violet -almond antique burnished rose metallic -almond aquamarine rose maroon antique -almond antique olive coral navajo -almond antique medium spring khaki -almond aquamarine burnished black steel -almond aquamarine floral ivory bisque -almond antique metallic orange dim -almond antique violet chocolate turquoise -almond antique burnished rose metallic -almond aquamarine dodger light gainsboro -almond antique forest lavender goldenrod -almond antique burnished rose metallic -almond aquamarine yellow dodger mint -almond antique misty red olive -almond antique chartreuse khaki white -almond antique chartreuse lavender yellow -almond antique medium spring khaki -almond azure aquamarine papaya violet -almond aquamarine sandy cyan gainsboro -almond aquamarine floral ivory bisque -almond antique salmon chartreuse burlywood -almond aquamarine rose maroon antique -almond antique olive coral navajo -almond azure blanched chiffon midnight -almond aquamarine burnished black steel -almond aquamarine midnight light salmon -almond antique sky peru orange -almond antique violet mint lemon -almond aquamarine pink moccasin thistle -almond antique violet turquoise frosted -almond antique gainsboro frosted violet -almond antique blue firebrick mint -almond aquamarine midnight light salmon -almond azure blanched chiffon midnight -almond antique chartreuse lavender yellow -almond antique gainsboro frosted violet -almond aquamarine burnished black steel -almond antique medium spring khaki -almond antique blue firebrick mint -almond antique olive coral navajo -almond aquamarine dodger light gainsboro -almond antique violet chocolate turquoise -almond aquamarine rose maroon antique -almond antique violet turquoise frosted -almond antique forest lavender goldenrod -almond aquamarine floral ivory bisque -almond aquamarine pink moccasin thistle -almond antique burnished rose metallic -almond azure aquamarine papaya violet -almond aquamarine sandy cyan gainsboro -almond antique salmon chartreuse burlywood -almond antique violet mint lemon -almond aquamarine yellow dodger mint -almond antique metallic orange dim -almond antique sky peru orange -almond antique misty red olive -almond antique burnished rose metallic -almond antique chartreuse khaki white -almond antique sky peru orange -almond aquamarine floral ivory bisque -almond azure aquamarine papaya violet -almond antique olive coral navajo -almond azure blanched chiffon midnight -almond aquamarine sandy cyan gainsboro -almond antique chartreuse lavender yellow -almond aquamarine burnished black steel -almond antique medium spring khaki -almond aquamarine midnight light salmon -almond antique misty red olive -almond antique violet chocolate turquoise -almond antique chartreuse khaki white -almond antique violet mint lemon -almond antique burnished rose metallic -almond antique burnished rose metallic -almond aquamarine yellow dodger mint -almond aquamarine pink moccasin thistle -almond antique violet turquoise frosted -almond aquamarine dodger light gainsboro -almond antique forest lavender goldenrod -almond antique blue firebrick mint -almond antique gainsboro frosted violet -almond antique metallic orange dim -almond antique salmon chartreuse burlywood -almond aquamarine rose maroon antique -almond antique chartreuse khaki white -almond aquamarine pink moccasin thistle -almond aquamarine floral ivory bisque -almond antique sky peru orange -almond aquamarine yellow dodger mint -almond antique violet mint lemon -almond antique metallic orange dim -almond antique burnished rose metallic -almond antique chartreuse lavender yellow -almond aquamarine burnished black steel -almond antique salmon chartreuse burlywood -almond antique misty red olive -almond antique violet turquoise frosted -almond antique blue firebrick mint -almond antique medium spring khaki -almond aquamarine rose maroon antique -almond azure blanched chiffon midnight -almond antique gainsboro frosted violet -almond aquamarine sandy cyan gainsboro -almond antique violet chocolate turquoise -almond aquamarine midnight light salmon -almond antique olive coral navajo -almond antique forest lavender goldenrod -almond azure aquamarine papaya violet -almond antique burnished rose metallic -almond aquamarine dodger light gainsboro -almond antique salmon chartreuse burlywood -almond antique salmon chartreuse burlywood -almond antique chartreuse lavender yellow -almond aquamarine sandy cyan gainsboro -almond antique metallic orange dim -almond antique chartreuse khaki white -almond antique violet mint lemon -almond antique gainsboro frosted violet -almond antique sky peru orange -almond antique olive coral navajo -almond antique chartreuse khaki white -almond antique burnished rose metallic -almond aquamarine rose maroon antique -almond antique violet mint lemon -almond azure blanched chiffon midnight -almond antique sky peru orange -almond antique gainsboro frosted violet -almond antique burnished rose metallic -almond aquamarine floral ivory bisque -almond azure blanched chiffon midnight -almond aquamarine yellow dodger mint -almond aquamarine rose maroon antique -almond antique violet turquoise frosted -almond antique burnished rose metallic -almond aquamarine yellow dodger mint -almond aquamarine dodger light gainsboro -almond aquamarine pink moccasin thistle -almond antique olive coral navajo -almond antique violet turquoise frosted -almond antique blue firebrick mint -almond aquamarine dodger light gainsboro -almond azure aquamarine papaya violet -almond aquamarine pink moccasin thistle -almond antique forest lavender goldenrod -almond antique burnished rose metallic -almond aquamarine floral ivory bisque -almond antique forest lavender goldenrod -almond antique blue firebrick mint -almond antique metallic orange dim -almond antique violet chocolate turquoise -almond antique chartreuse lavender yellow -almond antique violet chocolate turquoise -almond aquamarine midnight light salmon -almond aquamarine burnished black steel -almond antique medium spring khaki -almond azure aquamarine papaya violet -almond aquamarine midnight light salmon -almond antique misty red olive -almond aquamarine burnished black steel -almond antique medium spring khaki -almond aquamarine sandy cyan gainsboro -almond antique misty red olive -almond antique metallic orange dim -almond aquamarine dodger light gainsboro -almond antique burnished rose metallic -almond antique misty red olive -almond antique chartreuse lavender yellow -almond azure aquamarine papaya violet -almond antique salmon chartreuse burlywood -almond antique olive coral navajo -almond aquamarine burnished black steel -almond antique sky peru orange -almond aquamarine pink moccasin thistle -almond antique gainsboro frosted violet -almond antique violet chocolate turquoise -almond antique violet turquoise frosted -almond antique violet mint lemon -almond aquamarine midnight light salmon -almond azure blanched chiffon midnight -almond aquamarine rose maroon antique -almond aquamarine floral ivory bisque -almond aquamarine sandy cyan gainsboro -almond antique medium spring khaki -almond antique chartreuse khaki white -almond aquamarine yellow dodger mint -almond antique forest lavender goldenrod -almond antique burnished rose metallic -almond antique blue firebrick mint -almond aquamarine dodger light gainsboro -almond antique metallic orange dim -almond aquamarine midnight light salmon -almond aquamarine burnished black steel -almond antique violet turquoise frosted -almond antique violet chocolate turquoise -almond antique burnished rose metallic -almond antique forest lavender goldenrod -almond aquamarine rose maroon antique -almond antique chartreuse khaki white -almond antique sky peru orange -almond azure blanched chiffon midnight -almond antique olive coral navajo -almond antique misty red olive -almond aquamarine sandy cyan gainsboro -almond antique salmon chartreuse burlywood -almond aquamarine floral ivory bisque -almond antique blue firebrick mint -almond antique burnished rose metallic -almond azure aquamarine papaya violet -almond aquamarine yellow dodger mint -almond antique violet mint lemon -almond antique medium spring khaki -almond antique chartreuse lavender yellow -almond aquamarine pink moccasin thistle -almond antique gainsboro frosted violet -almond aquamarine dodger light gainsboro -almond azure blanched chiffon midnight -almond azure blanched chiffon midnight -almond aquamarine burnished black steel -almond aquamarine burnished black steel -almond antique violet chocolate turquoise -almond antique salmon chartreuse burlywood -almond antique salmon chartreuse burlywood -almond antique blue firebrick mint -almond aquamarine floral ivory bisque -almond antique gainsboro frosted violet -almond aquamarine floral ivory bisque -almond antique gainsboro frosted violet -almond antique violet turquoise frosted -almond antique violet turquoise frosted -almond antique blue firebrick mint -almond antique forest lavender goldenrod -almond antique forest lavender goldenrod -almond antique burnished rose metallic -almond antique olive coral navajo -almond antique burnished rose metallic -almond antique olive coral navajo -almond aquamarine rose maroon antique -almond aquamarine yellow dodger mint -almond antique violet chocolate turquoise -almond aquamarine pink moccasin thistle -almond aquamarine yellow dodger mint -almond antique violet mint lemon -almond antique violet mint lemon -almond aquamarine pink moccasin thistle -almond aquamarine rose maroon antique -almond antique misty red olive -almond antique misty red olive -almond antique chartreuse khaki white -almond antique chartreuse khaki white -almond antique metallic orange dim -almond antique burnished rose metallic -almond antique chartreuse lavender yellow -almond antique chartreuse lavender yellow -almond aquamarine midnight light salmon -almond aquamarine midnight light salmon -almond antique sky peru orange -almond antique sky peru orange -almond antique medium spring khaki -almond antique medium spring khaki -almond antique burnished rose metallic -almond antique metallic orange dim -almond azure aquamarine papaya violet -almond azure aquamarine papaya violet -almond aquamarine dodger light gainsboro -almond aquamarine sandy cyan gainsboro -almond aquamarine sandy cyan gainsboro -almond antique chartreuse lavender yellow -almond aquamarine burnished black steel -almond antique gainsboro frosted violet -almond aquamarine sandy cyan gainsboro -almond antique burnished rose metallic -almond antique chartreuse khaki white -almond antique violet turquoise frosted -almond antique olive coral navajo -almond aquamarine floral ivory bisque -almond antique salmon chartreuse burlywood -almond aquamarine yellow dodger mint -almond aquamarine pink moccasin thistle -almond antique sky peru orange -almond antique burnished rose metallic -almond antique metallic orange dim -almond antique violet chocolate turquoise -almond aquamarine dodger light gainsboro -almond antique violet mint lemon -almond azure aquamarine papaya violet -almond aquamarine midnight light salmon -almond antique misty red olive -almond antique medium spring khaki -almond antique blue firebrick mint -almond aquamarine rose maroon antique -almond antique forest lavender goldenrod -almond azure blanched chiffon midnight -almond antique sky peru orange -almond antique metallic orange dim -almond aquamarine rose maroon antique -almond antique blue firebrick mint -almond antique forest lavender goldenrod -almond aquamarine dodger light gainsboro -almond aquamarine pink moccasin thistle -almond aquamarine yellow dodger mint -almond antique violet turquoise frosted -almond antique burnished rose metallic -almond antique chartreuse khaki white -almond antique misty red olive -almond antique violet mint lemon -almond antique medium spring khaki -almond antique chartreuse lavender yellow -almond aquamarine burnished black steel -almond aquamarine midnight light salmon -almond aquamarine sandy cyan gainsboro -almond antique violet chocolate turquoise -almond antique burnished rose metallic -almond azure aquamarine papaya violet -almond aquamarine floral ivory bisque -almond azure blanched chiffon midnight -almond antique olive coral navajo -almond antique gainsboro frosted violet -almond antique salmon chartreuse burlywood -almond antique gainsboro frosted violet -almond antique blue firebrick mint -almond aquamarine pink moccasin thistle -almond antique violet turquoise frosted -almond antique sky peru orange -almond antique violet mint lemon -almond aquamarine burnished black steel -almond aquamarine midnight light salmon -almond antique olive coral navajo -almond azure blanched chiffon midnight -almond antique salmon chartreuse burlywood -almond aquamarine rose maroon antique -almond azure aquamarine papaya violet -almond aquamarine floral ivory bisque -almond antique chartreuse lavender yellow -almond aquamarine sandy cyan gainsboro -almond antique misty red olive -almond antique medium spring khaki -almond antique burnished rose metallic -almond antique chartreuse khaki white -almond aquamarine dodger light gainsboro -almond aquamarine yellow dodger mint -almond antique burnished rose metallic -almond antique forest lavender goldenrod -almond antique metallic orange dim -almond antique violet chocolate turquoise -almond antique burnished rose metallic -almond azure blanched chiffon midnight -almond antique olive coral navajo -almond aquamarine floral ivory bisque -almond azure aquamarine papaya violet -almond aquamarine midnight light salmon -almond aquamarine sandy cyan gainsboro -almond antique chartreuse lavender yellow -almond aquamarine burnished black steel -almond antique gainsboro frosted violet -almond antique medium spring khaki -almond antique misty red olive -almond antique violet mint lemon -almond antique sky peru orange -almond antique chartreuse khaki white -almond antique violet chocolate turquoise -almond antique burnished rose metallic -almond antique violet turquoise frosted -almond aquamarine yellow dodger mint -almond aquamarine pink moccasin thistle -almond aquamarine dodger light gainsboro -almond antique forest lavender goldenrod -almond antique blue firebrick mint -almond antique metallic orange dim -almond antique salmon chartreuse burlywood -almond aquamarine rose maroon antique -almond antique burnished rose metallic -almond aquamarine midnight light salmon -almond antique sky peru orange -almond antique salmon chartreuse burlywood -almond antique burnished rose metallic -almond aquamarine yellow dodger mint -almond antique metallic orange dim -almond antique chartreuse lavender yellow -almond antique violet turquoise frosted -almond aquamarine sandy cyan gainsboro -almond antique medium spring khaki -almond antique violet chocolate turquoise -almond antique olive coral navajo -almond aquamarine rose maroon antique -almond aquamarine burnished black steel -almond antique misty red olive -almond aquamarine pink moccasin thistle -almond aquamarine floral ivory bisque -almond antique gainsboro frosted violet -almond antique forest lavender goldenrod -almond aquamarine dodger light gainsboro -almond azure aquamarine papaya violet -almond azure blanched chiffon midnight -almond antique violet mint lemon -almond antique blue firebrick mint -almond antique chartreuse khaki white -almond azure blanched chiffon midnight -almond antique metallic orange dim -almond aquamarine pink moccasin thistle -almond aquamarine yellow dodger mint -almond aquamarine rose maroon antique -almond antique burnished rose metallic -almond antique burnished rose metallic -almond antique salmon chartreuse burlywood -almond aquamarine floral ivory bisque -almond aquamarine burnished black steel -almond antique medium spring khaki -almond antique chartreuse lavender yellow -almond aquamarine midnight light salmon -almond antique gainsboro frosted violet -almond aquamarine sandy cyan gainsboro -almond azure aquamarine papaya violet -almond aquamarine dodger light gainsboro -almond antique violet turquoise frosted -almond antique forest lavender goldenrod -almond antique chartreuse khaki white -almond antique misty red olive -almond antique olive coral navajo -almond antique violet mint lemon -almond antique blue firebrick mint -almond antique violet chocolate turquoise -almond antique sky peru orange -almond aquamarine sandy cyan gainsboro -almond aquamarine floral ivory bisque -almond antique chartreuse lavender yellow -almond antique metallic orange dim -almond aquamarine dodger light gainsboro -almond antique burnished rose metallic -almond aquamarine pink moccasin thistle -almond aquamarine rose maroon antique -almond azure aquamarine papaya violet -almond antique chartreuse khaki white -almond aquamarine yellow dodger mint -almond antique olive coral navajo -almond antique violet chocolate turquoise -almond antique salmon chartreuse burlywood -almond antique blue firebrick mint -almond antique forest lavender goldenrod -almond antique sky peru orange -almond aquamarine burnished black steel -almond antique burnished rose metallic -almond azure blanched chiffon midnight -almond antique medium spring khaki -almond antique misty red olive -almond antique violet turquoise frosted -almond antique gainsboro frosted violet -almond aquamarine midnight light salmon -almond antique violet mint lemon -almond antique burnished rose metallic -almond aquamarine midnight light salmon -almond antique chartreuse lavender yellow -almond aquamarine sandy cyan gainsboro -almond aquamarine burnished black steel -almond antique violet mint lemon -almond antique misty red olive -almond antique medium spring khaki -almond antique salmon chartreuse burlywood -almond antique sky peru orange -almond antique burnished rose metallic -almond antique chartreuse khaki white -almond antique violet turquoise frosted -almond aquamarine yellow dodger mint -almond aquamarine dodger light gainsboro -almond antique violet chocolate turquoise -almond aquamarine pink moccasin thistle -almond antique blue firebrick mint -almond aquamarine rose maroon antique -almond antique forest lavender goldenrod -almond azure aquamarine papaya violet -almond antique gainsboro frosted violet -almond antique metallic orange dim -almond azure blanched chiffon midnight -almond aquamarine floral ivory bisque -almond antique olive coral navajo -almond aquamarine rose maroon antique -almond antique olive coral navajo -almond antique violet chocolate turquoise -almond antique chartreuse khaki white -almond antique violet mint lemon -almond aquamarine midnight light salmon -almond azure blanched chiffon midnight -almond antique blue firebrick mint -almond antique burnished rose metallic -almond aquamarine dodger light gainsboro -almond antique misty red olive -almond aquamarine floral ivory bisque -almond antique metallic orange dim -almond aquamarine yellow dodger mint -almond antique medium spring khaki -almond antique chartreuse lavender yellow -almond aquamarine pink moccasin thistle -almond aquamarine sandy cyan gainsboro -almond antique salmon chartreuse burlywood -almond antique violet turquoise frosted -almond antique burnished rose metallic -almond antique forest lavender goldenrod -almond antique gainsboro frosted violet -almond aquamarine burnished black steel -almond azure aquamarine papaya violet -almond antique sky peru orange -almond aquamarine floral ivory bisque -almond antique chartreuse khaki white -almond antique misty red olive -almond aquamarine burnished black steel -almond antique sky peru orange -almond aquamarine dodger light gainsboro -almond antique violet mint lemon -almond antique medium spring khaki -almond antique forest lavender goldenrod -almond antique metallic orange dim -almond antique burnished rose metallic -almond antique gainsboro frosted violet -almond azure aquamarine papaya violet -almond antique blue firebrick mint -almond antique chartreuse lavender yellow -almond antique olive coral navajo -almond antique violet chocolate turquoise -almond azure blanched chiffon midnight -almond aquamarine midnight light salmon -almond antique violet turquoise frosted -almond aquamarine yellow dodger mint -almond aquamarine pink moccasin thistle -almond antique burnished rose metallic -almond aquamarine rose maroon antique -almond aquamarine sandy cyan gainsboro -almond antique salmon chartreuse burlywood diff --git ql/src/test/results/clientpositive/subquery_notin_having.q.out ql/src/test/results/clientpositive/subquery_notin_having.q.out index 644e70e..72a72f9 100644 --- ql/src/test/results/clientpositive/subquery_notin_having.q.out +++ ql/src/test/results/clientpositive/subquery_notin_having.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select key, count(*) from src @@ -172,32 +172,32 @@ STAGE PLANS: Filter Operator predicate: (key > '12') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string) - mode: hash + Select Operator + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string), true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1028,8 +1028,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), true (type: boolean) + sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double) @@ -1048,20 +1048,16 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string) + keys: _col0 (type: string), true (type: boolean) mode: complete - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1127,7 +1123,7 @@ POSTHOOK: query: INSERT INTO t2 VALUES (null), (2), (100) POSTHOOK: type: QUERY POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.c1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2) PREHOOK: type: QUERY POSTHOOK: query: explain SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2) @@ -1284,34 +1280,30 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int) - outputColumnNames: c1 + outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: c1 (type: int) + keys: _col0 (type: int), true (type: boolean) mode: hash - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1319,7 +1311,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT c1 FROM t1 group by c1 having c1 NOT IN (SELECT c1 FROM t2) PREHOOK: type: QUERY PREHOOK: Input: default@t1 diff --git ql/src/test/results/clientpositive/tez/explainuser_3.q.out ql/src/test/results/clientpositive/tez/explainuser_3.q.out index 17c9ec3..fbf61ef 100644 --- ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -16,14 +16,6 @@ POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@acid_vectorized POSTHOOK: Lineage: acid_vectorized.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] -PREHOOK: query: analyze table acid_vectorized compute statistics for columns -PREHOOK: type: QUERY -PREHOOK: Input: default@acid_vectorized -#### A masked pattern was here #### -POSTHOOK: query: analyze table acid_vectorized compute statistics for columns -POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid_vectorized -#### A masked pattern was here #### PREHOOK: query: explain select a, b from acid_vectorized order by a, b PREHOOK: type: QUERY POSTHOOK: query: explain select a, b from acid_vectorized order by a, b @@ -39,14 +31,14 @@ Stage-0 Stage-1 Reducer 2 vectorized File Output Operator [FS_8] - Select Operator [SEL_7] (rows=16 width=101) + Select Operator [SEL_7] (rows=16 width=106) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=16 width=101) + Select Operator [SEL_5] (rows=16 width=106) Output:["_col0","_col1"] - TableScan [TS_0] (rows=16 width=101) - default@acid_vectorized,acid_vectorized, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"] + TableScan [TS_0] (rows=16 width=106) + default@acid_vectorized,acid_vectorized, ACID table,Tbl:COMPLETE,Col:NONE,Output:["a","b"] PREHOOK: query: explain select key, value FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol @@ -729,7 +721,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 2 - Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) diff --git ql/src/test/results/clientpositive/unset_table_view_property.q.out ql/src/test/results/clientpositive/unset_table_view_property.q.out index d28d800..f9c9697 100644 --- ql/src/test/results/clientpositive/unset_table_view_property.q.out +++ ql/src/test/results/clientpositive/unset_table_view_property.q.out @@ -34,7 +34,6 @@ PREHOOK: query: SHOW TBLPROPERTIES vt.testTable PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: SHOW TBLPROPERTIES vt.testTable POSTHOOK: type: SHOW_TBLPROPERTIES -COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} a 1 c 3 #### A masked pattern was here #### @@ -55,7 +54,6 @@ PREHOOK: query: SHOW TBLPROPERTIES vt.testTable PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: SHOW TBLPROPERTIES vt.testTable POSTHOOK: type: SHOW_TBLPROPERTIES -COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -74,7 +72,6 @@ PREHOOK: query: SHOW TBLPROPERTIES vt.testTable PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: SHOW TBLPROPERTIES vt.testTable POSTHOOK: type: SHOW_TBLPROPERTIES -COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} a 1 c 3 d 4 @@ -96,7 +93,6 @@ PREHOOK: query: SHOW TBLPROPERTIES vt.testTable PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: SHOW TBLPROPERTIES vt.testTable POSTHOOK: type: SHOW_TBLPROPERTIES -COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} c 3 #### A masked pattern was here #### numFiles 0 @@ -116,7 +112,6 @@ PREHOOK: query: SHOW TBLPROPERTIES vt.testTable PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: SHOW TBLPROPERTIES vt.testTable POSTHOOK: type: SHOW_TBLPROPERTIES -COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -135,7 +130,6 @@ PREHOOK: query: SHOW TBLPROPERTIES vt.testTable PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: SHOW TBLPROPERTIES vt.testTable POSTHOOK: type: SHOW_TBLPROPERTIES -COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} a 1 b 2 c 3 @@ -158,7 +152,6 @@ PREHOOK: query: SHOW TBLPROPERTIES vt.testTable PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: SHOW TBLPROPERTIES vt.testTable POSTHOOK: type: SHOW_TBLPROPERTIES -COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} a 1 c 3 #### A masked pattern was here #### @@ -179,7 +172,6 @@ PREHOOK: query: SHOW TBLPROPERTIES vt.testTable PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: SHOW TBLPROPERTIES vt.testTable POSTHOOK: type: SHOW_TBLPROPERTIES -COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} a 1 #### A masked pattern was here #### numFiles 0 diff --git ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out index dead5a6..850278e 100644 --- ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out @@ -166,7 +166,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Execution mode: vectorized Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out index 7820831..1652996 100644 --- ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[34][bigTable=?] in task 'Stage-8:MAPRED' is a cross product +Warning: Map Join MAPJOIN[35][bigTable=?] in task 'Stage-8:MAPRED' is a cross product PREHOOK: query: explain select * from src @@ -214,34 +214,30 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: key (type: string) + keys: _col0 (type: string), true (type: boolean) mode: hash - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) mode: mergepartial - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -249,7 +245,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[34][bigTable=?] in task 'Stage-8:MAPRED' is a cross product +Warning: Map Join MAPJOIN[35][bigTable=?] in task 'Stage-8:MAPRED' is a cross product PREHOOK: query: select * from src where not key in diff --git ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out index eaaf3e9..1dec224 100644 --- ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out @@ -68,7 +68,7 @@ STAGE PLANS: alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) + predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) @@ -227,7 +227,7 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + predicate: (l_shipmode = 'AIR') (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int) diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java index 264335c..a597fd7 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java @@ -583,10 +583,6 @@ public boolean readField(int fieldIndex) throws IOException { // U+FFFD will throw this as well logExceptionMessage(bytes, fieldStart, fieldLength, primitiveCategories[fieldIndex]); return false; - } catch (IllegalArgumentException iae) { - // E.g. can be thrown by Date.valueOf - logExceptionMessage(bytes, fieldStart, fieldLength, primitiveCategories[fieldIndex]); - return false; } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java index f4ac56f..f8a110d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java @@ -402,12 +402,10 @@ public static int writeVLongToByteArray(byte[] bytes, int offset, long l) { return 1 + len; } - public static int VLONG_BYTES_LEN = 9; - private static ThreadLocal vLongBytesThreadLocal = new ThreadLocal() { @Override public byte[] initialValue() { - return new byte[VLONG_BYTES_LEN]; + return new byte[9]; } }; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java index 085d71c..6bc4622 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java @@ -60,13 +60,11 @@ private HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable; private HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable; private HiveIntervalDayTime hiveIntervalDayTime; - private byte[] vLongBytes; private long[] scratchLongs; private byte[] scratchBuffer; public LazyBinarySerializeWrite(int fieldCount) { this(); - vLongBytes = new byte[LazyBinaryUtils.VLONG_BYTES_LEN]; this.fieldCount = fieldCount; } @@ -272,7 +270,7 @@ public void writeInt(int v) throws IOException { // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); - writeVInt(v); + LazyBinaryUtils.writeVInt(output, v); fieldIndex++; @@ -303,7 +301,7 @@ public void writeLong(long v) throws IOException { // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); - writeVLong(v); + LazyBinaryUtils.writeVLong(output, v); fieldIndex++; @@ -404,7 +402,7 @@ public void writeString(byte[] v) throws IOException { nullByte |= 1 << (fieldIndex % 8); int length = v.length; - writeVInt(length); + LazyBinaryUtils.writeVInt(output, length); output.write(v, 0, length); @@ -434,7 +432,7 @@ public void writeString(byte[] v, int start, int length) throws IOException { // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); - writeVInt(length); + LazyBinaryUtils.writeVInt(output, length); output.write(v, start, length); @@ -500,7 +498,7 @@ public void writeDate(Date date) throws IOException { // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); - writeVInt(DateWritable.dateToDays(date)); + LazyBinaryUtils.writeVInt(output, DateWritable.dateToDays(date)); fieldIndex++; @@ -529,7 +527,7 @@ public void writeDate(int dateAsDays) throws IOException { // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); - writeVInt(dateAsDays); + LazyBinaryUtils.writeVInt(output, dateAsDays); fieldIndex++; @@ -753,18 +751,4 @@ public void writeHiveDecimal(HiveDecimalWritable decWritable, int scale) throws output.writeByte(nullOffset, nullByte); } } - - /* - * Write a VInt using our temporary byte buffer instead of paying the thread local performance - * cost of LazyBinaryUtils.writeVInt - */ - private void writeVInt(int v) { - final int len = LazyBinaryUtils.writeVLongToByteArray(vLongBytes, v); - output.write(vLongBytes, 0, len); - } - - private void writeVLong(long v) { - final int len = LazyBinaryUtils.writeVLongToByteArray(vLongBytes, v); - output.write(vLongBytes, 0, len); - } } diff --git service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java index a652756..e9f619a 100644 --- service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java +++ service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java @@ -605,7 +605,7 @@ private AbstractSerDe getSerDe() throws SQLException { LOG.debug("Column types: " + types); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types); } - SerDeUtils.initializeSerDe(serde, queryState.getConf(), props, null); + SerDeUtils.initializeSerDe(serde, new HiveConf(), props, null); } catch (Exception ex) { ex.printStackTrace(); diff --git service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp index 33797ed..6e89981 100644 --- service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp +++ service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp @@ -193,7 +193,7 @@ for (HiveSession hiveSession: hiveSessions) { <%= operation.getState() %> <%= operation.getElapsedTime()/1000 %> <%= operation.getEndTime() == null ? "In Progress" : new Date(operation.getEndTime()) %> - <%= operation.getRuntime() == null ? "n/a" : operation.getRuntime()/1000 %> + <%= operation.getRuntime()/1000 %> <% String link = "/query_page?operationId=" + operation.getOperationId(); %> >Drilldown diff --git service/src/test/org/apache/hive/service/cli/CLIServiceTest.java service/src/test/org/apache/hive/service/cli/CLIServiceTest.java index f325dbc..237fcc0 100644 --- service/src/test/org/apache/hive/service/cli/CLIServiceTest.java +++ service/src/test/org/apache/hive/service/cli/CLIServiceTest.java @@ -665,8 +665,8 @@ public void testTaskStatus() throws Exception { ByteArrayInputStream in = new ByteArrayInputStream(jsonTaskStatus.getBytes("UTF-8")); List taskStatuses = mapper.readValue(in, new TypeReference>(){}); - System.out.println("task statuses: " + jsonTaskStatus); // TaskDisplay doesn't have a toString, using json checkTaskStatuses(taskStatuses); + System.out.println("task statuses: " + jsonTaskStatus); // TaskDisplay doesn't have a toString, using json if (OperationState.CANCELED == state || state == OperationState.CLOSED || state == OperationState.FINISHED || state == OperationState.ERROR) { @@ -693,6 +693,9 @@ private void checkTaskStatuses(List taskDisplays) { assertNull(taskDisplay.getReturnValue()); break; case RUNNING: + if (taskDisplay.getTaskType() == StageType.MAPRED || taskDisplay.getTaskType() == StageType.MAPREDLOCAL) { + assertNotNull(taskDisplay.getExternalHandle()); + } assertNotNull(taskDisplay.getBeginTime()); assertNull(taskDisplay.getEndTime()); assertNotNull(taskDisplay.getElapsedTime()); diff --git spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java index d4b63f0..0da40dd 100644 --- spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java +++ spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java @@ -335,7 +335,7 @@ public void run() { Preconditions.checkArgument(master != null, "spark.master is not defined."); String deployMode = conf.get("spark.submit.deployMode"); - List argv = Lists.newLinkedList(); + List argv = Lists.newArrayList(); if (sparkHome != null) { argv.add(new File(sparkHome, "bin/spark-submit").getAbsolutePath()); @@ -376,6 +376,16 @@ public void run() { argv.add("org.apache.spark.deploy.SparkSubmit"); } + if ("kerberos".equals(hiveConf.get(HADOOP_SECURITY_AUTHENTICATION))) { + String principal = SecurityUtil.getServerPrincipal(hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL), + "0.0.0.0"); + String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB); + argv.add("--principal"); + argv.add(principal); + argv.add("--keytab"); + argv.add(keyTabFile); + } + if (SparkClientUtilities.isYarnClusterMode(master, deployMode)) { String executorCores = conf.get("spark.executor.cores"); if (executorCores != null) { @@ -395,34 +405,6 @@ public void run() { argv.add(numOfExecutors); } } - // The options --principal/--keypad do not work with --proxy-user in spark-submit.sh - // (see HIVE-15485, SPARK-5493, SPARK-19143), so Hive could only support doAs or - // delegation token renewal, but not both. Since doAs is a more common case, if both - // are needed, we choose to favor doAs. So when doAs is enabled, we use kinit command, - // otherwise, we pass the principal/keypad to spark to support the token renewal for - // long-running application. - if ("kerberos".equals(hiveConf.get(HADOOP_SECURITY_AUTHENTICATION))) { - String principal = SecurityUtil.getServerPrincipal(hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL), - "0.0.0.0"); - String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB); - if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS)) { - List kinitArgv = Lists.newLinkedList(); - kinitArgv.add("kinit"); - kinitArgv.add(principal); - kinitArgv.add("-k"); - kinitArgv.add("-t"); - kinitArgv.add(keyTabFile + ";"); - kinitArgv.addAll(argv); - argv = kinitArgv; - } else { - // if doAs is not enabled, we pass the principal/keypad to spark-submit in order to - // support the possible delegation token renewal in Spark - argv.add("--principal"); - argv.add(principal); - argv.add("--keytab"); - argv.add(keyTabFile); - } - } if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS)) { try { String currentUser = Utils.getUGI().getShortUserName(); diff --git storage-api/src/java/org/apache/hive/common/util/BloomFilter.java storage-api/src/java/org/apache/hive/common/util/BloomFilter.java index e9f419d..d44bba8 100644 --- storage-api/src/java/org/apache/hive/common/util/BloomFilter.java +++ storage-api/src/java/org/apache/hive/common/util/BloomFilter.java @@ -292,42 +292,6 @@ public static BloomFilter deserialize(InputStream in) throws IOException { } } - // Given a byte array consisting of a serialized BloomFilter, gives the offset (from 0) - // for the start of the serialized long values that make up the bitset. - // NumHashFunctions (1 byte) + NumBits (4 bytes) - public static final int START_OF_SERIALIZED_LONGS = 5; - - /** - * Merges BloomFilter bf2 into bf1. - * Assumes 2 BloomFilters with the same size/hash functions are serialized to byte arrays - * @param bf1Bytes - * @param bf1Start - * @param bf1Length - * @param bf2Bytes - * @param bf2Start - * @param bf2Length - */ - public static void mergeBloomFilterBytes( - byte[] bf1Bytes, int bf1Start, int bf1Length, - byte[] bf2Bytes, int bf2Start, int bf2Length) { - if (bf1Length != bf2Length) { - throw new IllegalArgumentException("bf1Length " + bf1Length + " does not match bf2Length " + bf2Length); - } - - // Validation on the bitset size/3 hash functions. - for (int idx = 0; idx < START_OF_SERIALIZED_LONGS; ++idx) { - if (bf1Bytes[bf1Start + idx] != bf2Bytes[bf2Start + idx]) { - throw new IllegalArgumentException("bf1 NumHashFunctions/NumBits does not match bf2"); - } - } - - // Just bitwise-OR the bits together - size/# functions should be the same, - // rest of the data is serialized long values for the bitset which are supposed to be bitwise-ORed. - for (int idx = START_OF_SERIALIZED_LONGS; idx < bf1Length; ++idx) { - bf1Bytes[bf1Start + idx] |= bf2Bytes[bf2Start + idx]; - } - } - /** * Bare metal bit set implementation. For performance reasons, this implementation does not check * for index bounds nor expand the bit set size if the specified index is greater than the size. diff --git storage-api/src/test/org/apache/hive/common/util/TestBloomFilter.java storage-api/src/test/org/apache/hive/common/util/TestBloomFilter.java index e4ee93a..63c7050 100644 --- storage-api/src/test/org/apache/hive/common/util/TestBloomFilter.java +++ storage-api/src/test/org/apache/hive/common/util/TestBloomFilter.java @@ -20,12 +20,8 @@ import static org.junit.Assert.assertEquals; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.util.ArrayList; import java.util.Random; -import org.junit.Assert; import org.junit.Test; /** @@ -465,125 +461,4 @@ public void testMerge() { assertEquals(true, bf.testString(v2)); assertEquals(true, bf.testString(v3)); } - - @Test - public void testSerialize() throws Exception { - BloomFilter bf1 = new BloomFilter(10000); - String[] inputs = { - "bloo", - "bloom fil", - "bloom filter", - "cuckoo filter", - }; - - for (String val : inputs) { - bf1.addString(val); - } - - // Serialize/deserialize - ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); - BloomFilter.serialize(bytesOut, bf1); - ByteArrayInputStream bytesIn = new ByteArrayInputStream(bytesOut.toByteArray()); - BloomFilter bf2 = BloomFilter.deserialize(bytesIn); - - for (String val : inputs) { - assertEquals("Testing bf1 with " + val, true, bf1.testString(val)); - assertEquals("Testing bf2 with " + val, true, bf2.testString(val)); - } - } - - @Test - public void testMergeBloomFilterBytes() throws Exception { - BloomFilter bf1 = new BloomFilter(10000); - BloomFilter bf2 = new BloomFilter(10000); - - String[] inputs1 = { - "bloo", - "bloom fil", - "bloom filter", - "cuckoo filter", - }; - - String[] inputs2 = { - "2_bloo", - "2_bloom fil", - "2_bloom filter", - "2_cuckoo filter", - }; - - for (String val : inputs1) { - bf1.addString(val); - } - for (String val : inputs2) { - bf2.addString(val); - } - - ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); - BloomFilter.serialize(bytesOut, bf1); - byte[] bf1Bytes = bytesOut.toByteArray(); - bytesOut.reset(); - BloomFilter.serialize(bytesOut, bf1); - byte[] bf2Bytes = bytesOut.toByteArray(); - - // Merge bytes - BloomFilter.mergeBloomFilterBytes( - bf1Bytes, 0, bf1Bytes.length, - bf2Bytes, 0, bf2Bytes.length); - - // Deserialize and test - ByteArrayInputStream bytesIn = new ByteArrayInputStream(bf1Bytes, 0, bf1Bytes.length); - BloomFilter bfMerged = BloomFilter.deserialize(bytesIn); - // All values should pass test - for (String val : inputs1) { - bfMerged.addString(val); - } - for (String val : inputs2) { - bfMerged.addString(val); - } - } - - @Test - public void testMergeBloomFilterBytesFailureCases() throws Exception { - BloomFilter bf1 = new BloomFilter(1000); - BloomFilter bf2 = new BloomFilter(200); - // Create bloom filter with same number of bits, but different # hash functions - ArrayList bits = new ArrayList(); - for (int idx = 0; idx < bf1.getBitSet().length; ++idx) { - bits.add(0L); - } - BloomFilter bf3 = new BloomFilter(bits, bf1.getBitSize(), bf1.getNumHashFunctions() + 1); - - // Serialize to bytes - ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); - BloomFilter.serialize(bytesOut, bf1); - byte[] bf1Bytes = bytesOut.toByteArray(); - - bytesOut.reset(); - BloomFilter.serialize(bytesOut, bf2); - byte[] bf2Bytes = bytesOut.toByteArray(); - - bytesOut.reset(); - BloomFilter.serialize(bytesOut, bf3); - byte[] bf3Bytes = bytesOut.toByteArray(); - - try { - // this should fail - BloomFilter.mergeBloomFilterBytes( - bf1Bytes, 0, bf1Bytes.length, - bf2Bytes, 0, bf2Bytes.length); - Assert.fail("Expected exception not encountered"); - } catch (IllegalArgumentException err) { - // expected - } - - try { - // this should fail - BloomFilter.mergeBloomFilterBytes( - bf1Bytes, 0, bf1Bytes.length, - bf3Bytes, 0, bf3Bytes.length); - Assert.fail("Expected exception not encountered"); - } catch (IllegalArgumentException err) { - // expected - } - } }