diff --git .travis.yml .travis.yml index bd002b9..d53cfc1 100644 --- .travis.yml +++ .travis.yml @@ -27,9 +27,10 @@ language: java jdk: - oraclejdk8 -cache: - directories: - - $HOME/.m2 +# disabling cache for /home/travis/.m2/repository/org/apache/hive/hive-jdbc/3.0.0-SNAPSHOT/hive-jdbc-3.0.0-SNAPSHOT-standalone.jar (Permission denied) +#cache: +# directories: +# - $HOME/.m2 env: MAVEN_SKIP_RC=true diff --git common/src/java/org/apache/hadoop/hive/common/metrics/LegacyMetrics.java common/src/java/org/apache/hadoop/hive/common/metrics/LegacyMetrics.java index effe26b..d05c728 100644 --- common/src/java/org/apache/hadoop/hive/common/metrics/LegacyMetrics.java +++ common/src/java/org/apache/hadoop/hive/common/metrics/LegacyMetrics.java @@ -226,6 +226,11 @@ public void addGauge(String name, MetricsVariable variable) { } @Override + public void removeGauge(String name) { + //This implementation completely and exhaustively reverses the addGauge method above. + } + + @Override public void addRatio(String name, MetricsVariable numerator, MetricsVariable denominator) { //Not implemented diff --git common/src/java/org/apache/hadoop/hive/common/metrics/common/Metrics.java common/src/java/org/apache/hadoop/hive/common/metrics/common/Metrics.java index 88c513b..99d3e57 100644 --- common/src/java/org/apache/hadoop/hive/common/metrics/common/Metrics.java +++ common/src/java/org/apache/hadoop/hive/common/metrics/common/Metrics.java @@ -92,7 +92,15 @@ * @param name name of gauge * @param variable variable to track. */ - public void addGauge(String name, final MetricsVariable variable); + public void addGauge(String name, final MetricsVariable variable); + + + /** + * Removed the gauge added by addGauge. + * @param name name of gauge + */ + public void removeGauge(String name); + /** * Add a ratio metric to track the correlation between two variables diff --git common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/CodahaleMetrics.java common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/CodahaleMetrics.java index a43b09d..4f35a6d 100644 --- common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/CodahaleMetrics.java +++ common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/CodahaleMetrics.java @@ -294,6 +294,21 @@ public Object getValue() { addGaugeInternal(name, gauge); } + + @Override + public void removeGauge(String name) { + try { + gaugesLock.lock(); + gauges.remove(name); + // Metrics throws an Exception if we don't do this when the key already exists + if (metricRegistry.getGauges().containsKey(name)) { + metricRegistry.remove(name); + } + } finally { + gaugesLock.unlock(); + } + } + @Override public void addRatio(String name, MetricsVariable numerator, MetricsVariable denominator) { @@ -409,6 +424,7 @@ private boolean initCodahaleMetricsReporterClasses() { throw new IllegalArgumentException(e); } try { + // Note: Hadoop metric reporter does not support tags. We create a single reporter for all metrics. Constructor constructor = name.getConstructor(MetricRegistry.class, HiveConf.class); CodahaleReporter reporter = (CodahaleReporter) constructor.newInstance(metricRegistry, conf); reporter.start(); diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index a6866e7..697b194 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1162,6 +1162,13 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal // materialized views HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING("hive.materializedview.rewriting", false, "Whether to try to rewrite queries using the materialized views enabled for rewriting"), + HIVE_MATERIALIZED_VIEW_REWRITING_SELECTION_STRATEGY("hive.materializedview.rewriting.strategy", "heuristic", + new StringSet("heuristic", "costbased"), + "The strategy that should be used to cost and select the materialized view rewriting. \n" + + " heuristic: Always try to select the plan using the materialized view if rewriting produced one," + + "choosing the plan with lower cost among possible plans containing a materialized view\n" + + " costbased: Fully cost-based strategy, always use plan with lower cost, independently on whether " + + "it uses a materialized view or not"), HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW("hive.materializedview.rewriting.time.window", "0s", new TimeValidator(TimeUnit.SECONDS), "Time window, specified in seconds, after which outdated materialized views become invalid for automatic query rewriting.\n" + "For instance, if a materialized view is created and afterwards one of its source tables is changed at " + @@ -2486,6 +2493,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Applies when a user specifies a target WM pool in the JDBC connection string. If\n" + "false, the user can only specify a pool he is mapped to (e.g. make a choice among\n" + "multiple group mappings); if true, the user can specify any existing pool."), + HIVE_SERVER2_WM_POOL_METRICS("hive.server2.wm.pool.metrics", true, + "Whether per-pool WM metrics should be enabled."), HIVE_SERVER2_TEZ_WM_AM_REGISTRY_TIMEOUT("hive.server2.tez.wm.am.registry.timeout", "30s", new TimeValidator(TimeUnit.SECONDS), "The timeout for AM registry registration, after which (on attempting to use the\n" + diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java index abc856b..eca407e 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java @@ -23,6 +23,7 @@ import com.google.common.base.Supplier; import com.google.common.base.Suppliers; import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -57,6 +58,7 @@ import org.apache.hadoop.hive.metastore.DefaultHiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; @@ -103,6 +105,8 @@ private static final HttpClient HTTP_CLIENT; + private static List allowedAlterTypes = ImmutableList.of("ADDPROPS", "DROPPROPS", "ADDCOLS"); + static { final Lifecycle lifecycle = new Lifecycle(); try { @@ -681,4 +685,14 @@ private static HttpClient makeHttpClient(Lifecycle lifecycle) { public static HttpClient getHttpClient() { return HTTP_CLIENT; } + + @Override + public void preAlterTable(Table table, EnvironmentContext context) throws MetaException { + String alterOpType = context.getProperties().get(ALTER_TABLE_OPERATION_TYPE); + // alterOpType is null in case of stats update + if (alterOpType != null && !allowedAlterTypes.contains(alterOpType)) { + throw new MetaException( + "ALTER TABLE can not be used for " + alterOpType + " to a non-native table "); + } + } } diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java index 9b8a26e..d991adb 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.druid.DruidStorageHandler; import org.apache.hadoop.hive.druid.DruidStorageHandlerUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; @@ -486,4 +487,9 @@ public ObjectInspector getObjectInspector() { return inspector; } + @Override + public boolean shouldStoreFieldsInMetastore(Map tableParams) { + // If Druid table is not an external table store the schema in metadata store. + return !MetaStoreUtils.isExternal(tableParams); + } } diff --git hbase-handler/src/test/results/negative/hbase_ddl.q.out hbase-handler/src/test/results/negative/hbase_ddl.q.out index b5aad70..fde2fc5 100644 --- hbase-handler/src/test/results/negative/hbase_ddl.q.out +++ hbase-handler/src/test/results/negative/hbase_ddl.q.out @@ -26,4 +26,4 @@ key int It is a column key value string It is the column string value #### A masked pattern was here #### -FAILED: SemanticException [Error 10134]: ALTER TABLE can only be used for [ADDPROPS, DROPPROPS] to a non-native table hbase_table_1 +FAILED: SemanticException [Error 10134]: ALTER TABLE can only be used for [ADDPROPS, DROPPROPS, ADDCOLS] to a non-native table hbase_table_1 diff --git itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestTriggersMoveWorkloadManager.java itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestTriggersMoveWorkloadManager.java index ba5ff8e..c21778f 100644 --- itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestTriggersMoveWorkloadManager.java +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestTriggersMoveWorkloadManager.java @@ -97,7 +97,8 @@ public void testTriggerMoveAndKill() throws Exception { setCmds.add("set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.PostExecWMEventsSummaryPrinter"); List errCaptureExpect = new ArrayList<>(); errCaptureExpect.add("Workload Manager Events Summary"); - errCaptureExpect.add("Event: GET Pool: BI Cluster %: 80.00"); + errCaptureExpect.add("Event: GET Pool: BI"); + // HIVE-19061 introduces UPDATE event which will capture changes to allocation % after GET errCaptureExpect.add("Event: MOVE Pool: ETL Cluster %: 20.00"); errCaptureExpect.add("Event: KILL Pool: null Cluster %: 0.00"); errCaptureExpect.add("Event: RETURN Pool: null Cluster %: 0.00"); @@ -132,7 +133,8 @@ public void testTriggerMoveEscapeKill() throws Exception { setCmds.add("set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.PostExecWMEventsSummaryPrinter"); List errCaptureExpect = new ArrayList<>(); errCaptureExpect.add("Workload Manager Events Summary"); - errCaptureExpect.add("Event: GET Pool: BI Cluster %: 80.00"); + errCaptureExpect.add("Event: GET Pool: BI"); + // HIVE-19061 introduces UPDATE event which will capture changes to allocation % after GET errCaptureExpect.add("Event: MOVE Pool: ETL Cluster %: 20.00"); errCaptureExpect.add("Event: RETURN Pool: null Cluster %: 0.00"); errCaptureExpect.add("\"eventType\" : \"GET\""); @@ -166,7 +168,8 @@ public void testTriggerMoveBackKill() throws Exception { setCmds.add("set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.PostExecWMEventsSummaryPrinter"); List errCaptureExpect = new ArrayList<>(); errCaptureExpect.add("Workload Manager Events Summary"); - errCaptureExpect.add("Event: GET Pool: BI Cluster %: 80.00"); + errCaptureExpect.add("Event: GET Pool: BI"); + // HIVE-19061 introduces UPDATE event which will capture changes to allocation % after GET errCaptureExpect.add("Event: MOVE Pool: ETL Cluster %: 20.00"); errCaptureExpect.add("Event: MOVE Pool: BI Cluster %: 80.00"); errCaptureExpect.add("Event: KILL Pool: null Cluster %: 0.00"); @@ -243,7 +246,8 @@ public void testTriggerMoveConflictKill() throws Exception { setCmds.add("set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.PostExecWMEventsSummaryPrinter"); List errCaptureExpect = new ArrayList<>(); errCaptureExpect.add("Workload Manager Events Summary"); - errCaptureExpect.add("Event: GET Pool: BI Cluster %: 80.00"); + errCaptureExpect.add("Event: GET Pool: BI"); + // HIVE-19061 introduces UPDATE event which will capture changes to allocation % after GET errCaptureExpect.add("Event: KILL Pool: null Cluster %: 0.00"); errCaptureExpect.add("Event: RETURN Pool: null Cluster %: 0.00"); errCaptureExpect.add("\"eventType\" : \"GET\""); diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 099b36f..05a1cdd 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -1679,5 +1679,6 @@ druid.query.files=druidmini_test1.q,\ druidmini_dynamic_partition.q,\ druidmini_expressions.q,\ druidmini_extractTime.q,\ + druidmini_test_alter.q,\ druidmini_floorTime.q diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java index 3a2c19a..7451ea4 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java @@ -476,7 +476,7 @@ public void close() throws IOException { isClosed, isInterrupted, pendingError.get(), queue.size()); } LlapIoImpl.LOG.info("Maximum queue length observed " + maxQueueSize); - LlapIoImpl.LOG.info("Llap counters: {}" ,counters); // This is where counters are logged! + LlapIoImpl.LOG.info("Llap counters: {}" , counters); // This is where counters are logged! feedback.stop(); isClosed = true; rethrowErrorIfAny(pendingError.get()); diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 1faa50a..f3e40eb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -457,6 +457,10 @@ HIVE_GROUPING_SETS_SIZE_LIMIT(10411, "Grouping sets size cannot be greater than 64"), REBUILD_NO_MATERIALIZED_VIEW(10412, "Rebuild command only valid for materialized views"), + LOAD_DATA_ACID_FILE(10413, + "\"{0}\" was created created by Acid write - it cannot be loaded into anther Acid table", + true), + //========================== 20000 range starts here ========================// diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 314a186..01cadea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hive.metastore.DefaultHiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.PartitionDropOptions; import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.TableType; @@ -3822,10 +3823,12 @@ private int alterTable(Hive db, AlterTableDesc alterTbl) throws HiveException { } try { + EnvironmentContext environmentContext = alterTbl.getEnvironmentContext(); + environmentContext.putToProperties(HiveMetaHook.ALTER_TABLE_OPERATION_TYPE, alterTbl.getOp().name()); if (allPartitions == null) { - db.alterTable(alterTbl.getOldName(), tbl, alterTbl.getIsCascade(), alterTbl.getEnvironmentContext()); + db.alterTable(alterTbl.getOldName(), tbl, alterTbl.getIsCascade(), environmentContext); } else { - db.alterPartitions(Warehouse.getQualifiedName(tbl.getTTable()), allPartitions, alterTbl.getEnvironmentContext()); + db.alterPartitions(Warehouse.getQualifiedName(tbl.getTTable()), allPartitions, environmentContext); } // Add constraints if necessary addConstraints(db, alterTbl); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/GuaranteedTasksAllocator.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/GuaranteedTasksAllocator.java index a52928c..6d7fc25 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/GuaranteedTasksAllocator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/GuaranteedTasksAllocator.java @@ -102,13 +102,20 @@ protected int getExecutorCount(boolean allowUpdate) { } @Override - public void updateSessionsAsync(Double totalMaxAlloc, List sessionsToUpdate) { + public int translateAllocationToCpus(double allocation) { + // Do not make a remote call under any circumstances - this is supposed to be async. + return (int)Math.round(getExecutorCount(false) * allocation); + } + + @Override + public int updateSessionsAsync(Double totalMaxAlloc, List sessionsToUpdate) { // Do not make a remote call under any circumstances - this is supposed to be async. int totalCount = getExecutorCount(false); int totalToDistribute = -1; if (totalMaxAlloc != null) { totalToDistribute = (int)Math.round(totalCount * totalMaxAlloc); } + int totalDistributed = 0; double lastDelta = 0; for (int i = 0; i < sessionsToUpdate.size(); ++i) { WmTezSession session = sessionsToUpdate.get(i); @@ -122,6 +129,7 @@ public void updateSessionsAsync(Double totalMaxAlloc, List session // we'd produce 2-2-2-2-0 as we round 1.6; whereas adding the last delta to the next query // we'd round 1.6-1.2-1.8-1.4-2.0 and thus give out 2-1-2-1-2, as intended. // Note that fractions don't have to all be the same like in this example. + assert session.hasClusterFraction(); double fraction = session.getClusterFraction(); double allocation = fraction * totalCount + lastDelta; double roundedAlloc = Math.round(allocation); @@ -139,8 +147,10 @@ public void updateSessionsAsync(Double totalMaxAlloc, List session totalToDistribute -= intAlloc; } // This will only send update if it's necessary. + totalDistributed += intAlloc; updateSessionAsync(session, intAlloc); } + return totalDistributed; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/QueryAllocationManager.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/QueryAllocationManager.java index 9885ce7..32702c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/QueryAllocationManager.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/QueryAllocationManager.java @@ -32,8 +32,14 @@ * avoid various artifacts, esp. with small numbers and double weirdness. * Null means the total is unknown. * @param sessions Sessions to update based on their allocation fraction. + * @return The number of executors/cpus allocated. */ - void updateSessionsAsync(Double totalMaxAlloc, List sessions); + int updateSessionsAsync(Double totalMaxAlloc, List sessions); + + /** + * @return the number of CPUs equivalent to percentage allocation, for information purposes. + */ + int translateAllocationToCpus(double allocation); /** * Sets a callback to be invoked on cluster changes relevant to resource allocation. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmEvent.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmEvent.java index 33341ad..fae68ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmEvent.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmEvent.java @@ -31,6 +31,7 @@ private static final Logger LOG = LoggerFactory.getLogger(WmEvent.class); enum EventType { GET, // get session + UPDATE, // update session allocation KILL, // kill query DESTROY, // destroy session RESTART, // restart session @@ -51,7 +52,8 @@ WmTezSessionInfo(WmTezSession wmTezSession) { this.poolName = wmTezSession.getPoolName(); this.sessionId = wmTezSession.getSessionId(); - this.clusterPercent = wmTezSession.getClusterFraction() * 100.0; + this.clusterPercent = wmTezSession.hasClusterFraction() + ? wmTezSession.getClusterFraction() * 100.0 : 0; } public String getPoolName() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmPoolMetrics.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmPoolMetrics.java new file mode 100644 index 0000000..19b035e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmPoolMetrics.java @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.tez; + +import java.lang.annotation.Annotation; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.common.metrics.common.Metrics; +import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; +import org.apache.hadoop.hive.common.metrics.common.MetricsVariable; +import org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics; +import org.apache.hadoop.metrics2.MetricsCollector; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.metrics2.MetricsSource; +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.impl.MsInfo; +import org.apache.hadoop.metrics2.lib.MetricsRegistry; +import org.apache.hadoop.metrics2.lib.MutableCounterInt; +import org.apache.hadoop.metrics2.lib.MutableGaugeInt; +import org.apache.hadoop.metrics2.lib.MutableMetric; + +/** + * A wrapper for metrics for single WM pool. This outputs metrics both to Codahale and standard + * Hadoop metrics, in parallel. The codahale output is prefixed with pool name and is mostly + * for the JMX view, or to look at when Hadoop metrics are not set up. Hadoop metrics are output + * because they can be tagged properly rather than prefixed, so they are better for dashboards. + */ +public class WmPoolMetrics implements MetricsSource { + private final String poolName, sourceName; + private MetricsSystem ms; + @SuppressWarnings("unused") // Metrics system will get this via reflection 0_o + private final MetricsRegistry registry; + + // Codahale. We just include the pool name in the counter name. + private List codahaleGaugeNames; + private Map allMetrics; + + @Metric("Number of guaranteed cluster executors given to queries") + MutableGaugeInt numExecutors; + @Metric("Number of guaranteed cluster executors allocated") + MutableGaugeInt numExecutorsMax; + @Metric("Number of parallel queries allowed to run") + MutableGaugeInt numParallelQueries; + @Metric("Number of queries running") + MutableCounterInt numRunningQueries; + @Metric("Number of queries queued") + MutableCounterInt numQueuedQueries; + + // TODO: these would need to be propagated from AM via progress. + // @Metric("Number of allocated guaranteed executors in use"), + // @Metric("Number of speculative executors in use") + + public WmPoolMetrics(String poolName, MetricsSystem ms) { + this.poolName = poolName; + this.sourceName = "WmPoolMetrics." + poolName; + this.ms = ms; + + this.registry = new MetricsRegistry(sourceName); + } + + + public void initAfterRegister() { + // Make sure we capture the same metrics as Hadoop2 metrics system, via annotations. + if (allMetrics != null) return; + allMetrics = new HashMap<>(); + for (Field field : this.getClass().getDeclaredFields()) { + for (Annotation annotation : field.getAnnotations()) { + if (!(annotation instanceof Metric)) continue; + try { + field.setAccessible(true); + allMetrics.put(field.getName(), (MutableMetric) field.get(this)); + } catch (IllegalAccessException ex) { + break; // Not expected, access by the same class. + } + break; + } + } + + // Set up codahale if enabled; we cannot tag the values so just prefix them for the JMX view. + Metrics chMetrics = MetricsFactory.getInstance(); + if (!(chMetrics instanceof CodahaleMetrics)) return; + + List codahaleNames = new ArrayList<>(); + for (Map.Entry e : allMetrics.entrySet()) { + MutableMetric metric = e.getValue(); + MetricsVariable var = null; + if (metric instanceof MutableCounterInt) { + var = new CodahaleCounterWrapper((MutableCounterInt) metric); + } else if (metric instanceof MutableGaugeInt) { + var = new CodahaleGaugeWrapper((MutableGaugeInt) metric); + } + if (var == null) continue; // Unexpected metric type. + String name = "WM_" + poolName + "_" + e.getKey(); + codahaleNames.add(name); + chMetrics.addGauge(name, var); + } + this.codahaleGaugeNames = codahaleNames; + } + + + public void setParallelQueries(int size) { + numParallelQueries.set(size); + } + + public void setExecutors(int allocation) { + numExecutors.set(allocation); + } + + public void setMaxExecutors(int allocation) { + numExecutorsMax.set(allocation); + } + + public void addQueuedQuery() { + numQueuedQueries.incr(); + } + + public void addRunningQuery() { + numRunningQueries.incr(); + } + + public void removeQueuedQueries(int num) { + numQueuedQueries.incr(-num); + } + + public void removeRunningQueries(int num) { + numRunningQueries.incr(-num); + } + + public void moveQueuedToRunning() { + numQueuedQueries.incr(-1); + numRunningQueries.incr(); + } + + @Override + public void getMetrics(MetricsCollector collector, boolean all) { + // We could also have one metricssource for all the pools and add all the pools to the collector + // in its getMetrics call (as separate records). Not clear if that's supported. + // Also, we'd have to initialize the metrics ourselves instead of using @Metric annotation. + MetricsRecordBuilder rb = collector.addRecord("WmPoolMetrics." + poolName) + .setContext("HS2").tag(MsInfo.SessionId, poolName); + if (allMetrics == null) { + initAfterRegister(); // This happens if register calls getMetrics. + } + for (MutableMetric metric : allMetrics.values()) { + metric.snapshot(rb, all); + } + } + + public static WmPoolMetrics create(String poolName, MetricsSystem ms) { + WmPoolMetrics metrics = new WmPoolMetrics(poolName, ms); + metrics = ms.register(metrics.sourceName, "WM " + poolName + " pool metrics", metrics); + metrics.initAfterRegister(); + return metrics; + } + + public void destroy() { + ms.unregisterSource(sourceName); + ms = null; + if (codahaleGaugeNames != null) { + Metrics metrics = MetricsFactory.getInstance(); + for (String chgName : codahaleGaugeNames) { + metrics.removeGauge(chgName); + } + codahaleGaugeNames = null; + } + } + + private static class CodahaleGaugeWrapper implements MetricsVariable { + private final MutableGaugeInt mm; + + public CodahaleGaugeWrapper(MutableGaugeInt mm) { + this.mm = mm; + } + + @Override + public Integer getValue() { + return mm.value(); + } + } + + private static class CodahaleCounterWrapper implements MetricsVariable { + private final MutableCounterInt mm; + + public CodahaleCounterWrapper(MutableCounterInt mm) { + this.mm = mm; + } + + @Override + public Integer getValue() { + return mm.value(); + } + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmTezSession.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmTezSession.java index 1cf5493..fa2b02e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmTezSession.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WmTezSession.java @@ -38,7 +38,7 @@ @JsonProperty("poolName") private String poolName; @JsonProperty("clusterFraction") - private double clusterFraction; + private Double clusterFraction; /** * The reason to kill an AM. Note that this is for the entire session, not just for a query. * Once set, this can never be unset because you can only kill the session once. @@ -174,7 +174,11 @@ void setClusterFraction(double fraction) { void clearWm() { this.poolName = null; - this.clusterFraction = 0f; + this.clusterFraction = null; + } + + public boolean hasClusterFraction() { + return this.clusterFraction != null; } public double getClusterFraction() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WorkloadManager.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WorkloadManager.java index f0e620c..65e3c82 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WorkloadManager.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/WorkloadManager.java @@ -23,11 +23,13 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import com.google.common.math.DoubleMath; import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.SettableFuture; import com.google.common.util.concurrent.ThreadFactoryBuilder; + import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -50,6 +52,7 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; + import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; @@ -62,6 +65,7 @@ import org.apache.hadoop.hive.ql.exec.tez.AmPluginNode.AmPluginInfo; import org.apache.hadoop.hive.ql.exec.tez.TezSessionState.HiveResources; import org.apache.hadoop.hive.ql.exec.tez.UserPoolMapping.MappingInput; +import org.apache.hadoop.hive.ql.exec.tez.WmEvent.EventType; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.session.KillQuery; import org.apache.hadoop.hive.ql.session.SessionState; @@ -70,6 +74,8 @@ import org.apache.hadoop.hive.ql.wm.Trigger; import org.apache.hadoop.hive.ql.wm.TriggerActionHandler; import org.apache.hadoop.hive.ql.wm.WmContext; +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hive.common.util.Ref; import org.apache.tez.dag.api.TezConfiguration; import org.codehaus.jackson.annotate.JsonAutoDetect; @@ -107,8 +113,9 @@ private final String yarnQueue; private final int amRegistryTimeoutMs; private final boolean allowAnyPool; + private final MetricsSystem metricsSystem; // Note: it's not clear that we need to track this - unlike PoolManager we don't have non-pool - // sessions, so the pool itself could internally track the sessions it gave out, since + // sessions, so the pool itself could internally track the ses sions it gave out, since // calling close on an unopened session is probably harmless. private final IdentityHashMap openSessions = new IdentityHashMap<>(); @@ -216,6 +223,11 @@ public static WorkloadManager create(String yarnQueue, HiveConf conf, WMFullReso .setDaemon(true).setNameFormat("Workload management timeout thread").build()); allowAnyPool = HiveConf.getBoolVar(conf, ConfVars.HIVE_SERVER2_WM_ALLOW_ANY_POOL_VIA_JDBC); + if (HiveConf.getBoolVar(conf, ConfVars.HIVE_SERVER2_WM_POOL_METRICS)) { + metricsSystem = DefaultMetricsSystem.instance(); + } else { + metricsSystem = null; + } wmThread = new Thread(() -> runWmThread(), "Workload management master"); wmThread.setDaemon(true); @@ -721,14 +733,16 @@ private void processCurrentEvents(EventState e, WmThreadSyncWork syncWork) throw private void dumpPoolState(PoolState ps, List set) { StringBuilder sb = new StringBuilder(); - sb.append("POOL ").append(ps.fullName).append(": qp ").append(ps.queryParallelism).append(", %% ") - .append(ps.finalFraction).append(", sessions: ").append(ps.sessions.size()) - .append(", initializing: ").append(ps.initializingSessions.size()).append(", queued: ").append(ps.queue.size()); + sb.append("POOL ").append(ps.fullName).append(": qp ").append(ps.queryParallelism) + .append(", %% ").append(ps.finalFraction).append(", sessions: ").append(ps.sessions.size()) + .append(", initializing: ").append(ps.initializingSessions.size()).append(", queued: ") + .append(ps.queue.size()); set.add(sb.toString()); sb.setLength(0); for (WmTezSession session : ps.sessions) { - sb.append("RUNNING: ").append(session.getClusterFraction()).append(" (") - .append(session.getAllocationState()).append(") => ").append(session.getSessionId()); + double cf = session.hasClusterFraction() ? session.getClusterFraction() : 0; + sb.append("RUNNING: ").append(cf).append(" (") .append(session.getAllocationState()) + .append(") => ").append(session.getSessionId()); set.add(sb.toString()); sb.setLength(0); } @@ -755,7 +769,7 @@ private void handleMoveSessionOnMasterThread(final MoveSession moveSession, WmEvent moveEvent = new WmEvent(WmEvent.EventType.MOVE); // remove from src pool RemoveSessionResult rr = checkAndRemoveSessionFromItsPool( - moveSession.srcSession, poolsToRedistribute, true); + moveSession.srcSession, poolsToRedistribute, true, true); if (rr == RemoveSessionResult.OK) { // check if there is capacity in dest pool, if so move else kill the session if (capacityAvailable(destPoolName)) { @@ -859,7 +873,7 @@ private RemoveSessionResult handleReturnedInUseSessionOnMasterThread( reuseRequest.future.setException(new AssertionError("Invalid reuse attempt")); } session.setQueryId(null); - return checkAndRemoveSessionFromItsPool(session, poolsToRedistribute, isReturn); + return checkAndRemoveSessionFromItsPool(session, poolsToRedistribute, isReturn, true); } private void handeReopenRequestOnMasterThread(EventState e, WmTezSession session, @@ -876,7 +890,9 @@ private void handeReopenRequestOnMasterThread(EventState e, WmTezSession session // anything. Instead, we will try to give out an existing session from the pool, and restart // the problematic one in background. String poolName = session.getPoolName(); - RemoveSessionResult rr = checkAndRemoveSessionFromItsPool(session, poolsToRedistribute, false); + // Do not update metrics, we'd immediately add the session back if we are able to remove. + RemoveSessionResult rr = checkAndRemoveSessionFromItsPool( + session, poolsToRedistribute, false, false); switch (rr) { case OK: // If pool didn't exist, checkAndRemoveSessionFromItsPool wouldn't have returned OK. @@ -885,6 +901,7 @@ private void handeReopenRequestOnMasterThread(EventState e, WmTezSession session session.getWmContext(), session.extractHiveResources()); // We have just removed the session from the same pool, so don't check concurrency here. pool.initializingSessions.add(sw); + // Do not update metrics - see above. sw.start(); syncWork.toRestartInUse.add(session); return; @@ -920,10 +937,10 @@ private void handleUpdateErrorOnMasterThread(WmTezSession session, // This session is bad, so don't allow reuse; just convert it to normal get. reuseRequest.sessionToReuse = null; } - // TODO: we should communicate this to the user more explicitly (use kill query API, or - // add an option for bg kill checking to TezTask/monitor? + // We are assuming the update-error AM is bad and just try to kill it. - RemoveSessionResult rr = checkAndRemoveSessionFromItsPool(session, poolsToRedistribute, null); + RemoveSessionResult rr = checkAndRemoveSessionFromItsPool( + session, poolsToRedistribute, null, true); switch (rr) { case OK: case NOT_FOUND: @@ -989,7 +1006,7 @@ private void applyNewResourcePlanOnMasterThread( } PoolState state = oldPools == null ? null : oldPools.remove(fullName); if (state == null) { - state = new PoolState(fullName, qp, fraction, pool.getSchedulingPolicy()); + state = new PoolState(fullName, qp, fraction, pool.getSchedulingPolicy(), metricsSystem); } else { // This will also take care of the queries if query parallelism changed. state.update(qp, fraction, syncWork, e, pool.getSchedulingPolicy()); @@ -1001,6 +1018,12 @@ private void applyNewResourcePlanOnMasterThread( totalQueryParallelism += qp; } } + for (PoolState pool : pools.values()) { + if (pool.metrics != null) { + pool.metrics.setMaxExecutors( + allocationManager.translateAllocationToCpus(pool.finalFractionRemaining)); + } + } // TODO: in the current impl, triggers are added to RP. For tez, no pool triggers (mapping between trigger name and // pool name) will exist which means all triggers applies to tez. For LLAP, pool triggers has to exist for attaching // triggers to specific pools. @@ -1094,12 +1117,18 @@ private void queueGetRequestOnMasterThread( String oldPoolName = req.sessionToReuse.getPoolName(); oldPool = pools.get(oldPoolName); RemoveSessionResult rr = checkAndRemoveSessionFromItsPool( - req.sessionToReuse, poolsToRedistribute, true); + req.sessionToReuse, poolsToRedistribute, true, false); if (rr != RemoveSessionResult.OK) { + if (oldPool.metrics != null) { + oldPool.metrics.removeRunningQueries(1); + } // Abandon the reuse attempt. returnSessionOnFailedReuse(req, syncWork, null); req.sessionToReuse = null; } else if (pool.getTotalActiveSessions() + pool.queue.size() >= pool.queryParallelism) { + if (oldPool.metrics != null) { + oldPool.metrics.removeRunningQueries(1); + } // One cannot simply reuse the session if there are other queries waiting; to maintain // fairness, we'll try to take a query slot instantly, and if that fails we'll return // this session back to the pool and give the user a new session later. @@ -1113,6 +1142,7 @@ private void queueGetRequestOnMasterThread( req.sessionToReuse.setPoolName(poolName); req.sessionToReuse.setQueueName(yarnQueue); req.sessionToReuse.setQueryId(req.queryId); + // Do not update metrics - we didn't update on removal. pool.sessions.add(req.sessionToReuse); if (pool != oldPool) { poolsToRedistribute.add(poolName); @@ -1123,6 +1153,9 @@ private void queueGetRequestOnMasterThread( // Otherwise, queue the session and make sure we update this pool. pool.queue.addLast(req); + if (pool.metrics != null) { + pool.metrics.addQueuedQuery(); + } poolsToRedistribute.add(poolName); } @@ -1134,7 +1167,7 @@ private void processPoolChangesOnMasterThread( // 1. First, start the queries from the queue. int queriesToStart = Math.min(pool.queue.size(), - pool.queryParallelism - pool.getTotalActiveSessions()); + pool.queryParallelism - pool.getTotalActiveSessions()); if (queriesToStart > 0) { LOG.info("Starting {} queries in pool {}", queriesToStart, pool); @@ -1145,6 +1178,9 @@ private void processPoolChangesOnMasterThread( } for (int i = 0; i < queriesToStart; ++i) { GetRequest queueReq = pool.queue.pollFirst(); + if (pool.metrics != null) { + pool.metrics.moveQueuedToRunning(); + } assert queueReq.sessionToReuse == null; // Note that in theory, we are guaranteed to have a session waiting for us here, but // the expiration, failures, etc. may cause one to be missing pending restart. @@ -1170,7 +1206,14 @@ private void processPoolChangesOnMasterThread( // logic to be consistent between all the separate calls in one master thread processing round. // Note: If allocation manager does not have cluster state, it won't update anything. When the // cluster state changes, it will notify us, and we'd update the queries again. - allocationManager.updateSessionsAsync(totalAlloc, pool.sessions); + int cpusAllocated = allocationManager.updateSessionsAsync(totalAlloc, pool.sessions); + if (pool.metrics != null) { + pool.metrics.setExecutors(cpusAllocated); + if (cpusAllocated > 0) { + // Update max executors now that cluster info is definitely available. + pool.metrics.setMaxExecutors(allocationManager.translateAllocationToCpus(totalAlloc)); + } + } } private void returnSessionOnFailedReuse( @@ -1181,7 +1224,7 @@ private void returnSessionOnFailedReuse( session.setQueryId(null); if (poolsToRedistribute != null) { RemoveSessionResult rr = checkAndRemoveSessionFromItsPool( - session, poolsToRedistribute, true); + session, poolsToRedistribute, true, true); // The session cannot have been killed just now; this happens after all the kills in // the current iteration, so we would have cleared sessionToReuse when killing this. boolean isOk = (rr == RemoveSessionResult.OK); @@ -1217,8 +1260,8 @@ private void returnSessionOnFailedReuse( * thread (so we are dealing with an outdated request); null if the session should be * in WM but wasn't found in the requisite pool (internal error?). */ - private RemoveSessionResult checkAndRemoveSessionFromItsPool( - WmTezSession session, Set poolsToRedistribute, Boolean isSessionOk) { + private RemoveSessionResult checkAndRemoveSessionFromItsPool(WmTezSession session, + Set poolsToRedistribute, Boolean isSessionOk, boolean updateMetrics) { // It is possible for some request to be queued after a main thread has decided to kill this // session; on the next iteration, we'd be processing that request with an irrelevant session. if (session.isIrrelevantForWm()) { @@ -1237,6 +1280,9 @@ private RemoveSessionResult checkAndRemoveSessionFromItsPool( PoolState pool = pools.get(poolName); session.clearWm(); if (pool != null && pool.sessions.remove(session)) { + if (updateMetrics && pool.metrics != null) { + pool.metrics.removeRunningQueries(1); + } return RemoveSessionResult.OK; } } @@ -1255,6 +1301,9 @@ private Boolean checkAndAddSessionToAnotherPool( PoolState destPool = pools.get(destPoolName); if (destPool != null && destPool.sessions.add(session)) { + if (destPool.metrics != null) { + destPool.metrics.addRunningQuery(); + } session.setPoolName(destPoolName); updateTriggers(session); poolsToRedistribute.add(destPoolName); @@ -1675,6 +1724,7 @@ Runnable getTriggerValidatorRunnable() { // Note: the list is expected to be a few items; if it's longer we may want an IHM. private final LinkedList sessions = new LinkedList<>(); private final LinkedList queue = new LinkedList<>(); + private final WmPoolMetrics metrics; private final String fullName; private double finalFraction; @@ -1684,8 +1734,11 @@ Runnable getTriggerValidatorRunnable() { private WMPoolSchedulingPolicy schedulingPolicy; public PoolState(String fullName, int queryParallelism, double fraction, - String schedulingPolicy) { + String schedulingPolicy, MetricsSystem ms) { this.fullName = fullName; + // TODO: this actually calls the metrics system and getMetrics - that may be expensive. + // For now it looks like it should be ok to do on WM thread. + this.metrics = ms == null ? null : WmPoolMetrics.create(fullName, ms); update(queryParallelism, fraction, null, null, schedulingPolicy); } @@ -1697,6 +1750,9 @@ public void update(int queryParallelism, double fraction, WmThreadSyncWork syncWork, EventState e, String schedulingPolicy) { this.finalFraction = this.finalFractionRemaining = fraction; this.queryParallelism = queryParallelism; + if (metrics != null) { + metrics.setParallelQueries(queryParallelism); + } try { this.schedulingPolicy = MetaStoreUtils.parseSchedulingPolicy(schedulingPolicy); } catch (IllegalArgumentException ex) { @@ -1716,6 +1772,10 @@ public void update(int queryParallelism, double fraction, // We will requeue, and not kill, the queries that are not running yet. // Insert them all before the get requests from this iteration. GetRequest req; + if (metrics != null) { + metrics.removeQueuedQueries(queue.size()); + } + while ((req = queue.pollLast()) != null) { e.getRequests.addFirst(req); } @@ -1727,6 +1787,10 @@ public void destroy(WmThreadSyncWork syncWork, // All the pending get requests should just be requeued elsewhere. // Note that we never queue session reuse so sessionToReuse would be null. globalQueue.addAll(0, queue); + if (metrics != null) { + metrics.removeQueuedQueries(queue.size()); + metrics.destroy(); + } queue.clear(); } @@ -1737,7 +1801,7 @@ public double updateAllocationPercentages() { if (totalSessions == 0) return 0; double allocation = finalFractionRemaining / totalSessions; for (WmTezSession session : sessions) { - session.setClusterFraction(allocation); + updateSessionAllocationWithEvent(session, allocation); } // Do not give out the capacity of the initializing sessions to the running ones; // we expect init to be fast. @@ -1746,7 +1810,7 @@ public double updateAllocationPercentages() { if (sessions.isEmpty()) return 0; boolean isFirst = true; for (WmTezSession session : sessions) { - session.setClusterFraction(isFirst ? finalFractionRemaining : 0); + updateSessionAllocationWithEvent(session, isFirst ? finalFractionRemaining : 0); isFirst = false; } return finalFractionRemaining; @@ -1755,6 +1819,19 @@ public double updateAllocationPercentages() { } } + private void updateSessionAllocationWithEvent(WmTezSession session, double allocation) { + WmEvent event = null; + WmContext ctx = session.getWmContext(); + if (ctx != null && session.hasClusterFraction() + && !DoubleMath.fuzzyEquals(session.getClusterFraction(), allocation, 0.0001f)) { + event = new WmEvent(EventType.UPDATE); + } + session.setClusterFraction(allocation); + if (event != null) { + event.endEvent(session); + } + } + public LinkedList getSessions() { return sessions; } @@ -1774,6 +1851,7 @@ public String toString() { private void extractAllSessionsToKill(String killReason, IdentityHashMap toReuse, WmThreadSyncWork syncWork) { + int totalCount = sessions.size() + initializingSessions.size(); for (WmTezSession sessionToKill : sessions) { resetRemovedSessionToKill(syncWork.toKillQuery, new KillQueryContext(sessionToKill, killReason), toReuse); @@ -1791,6 +1869,9 @@ private void extractAllSessionsToKill(String killReason, new KillQueryContext(sessionToKill, killReason), toReuse); } initializingSessions.clear(); + if (metrics != null) { + metrics.removeRunningQueries(totalCount); + } } public void setTriggers(final LinkedList triggers) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezProgressMonitor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezProgressMonitor.java index a14cdb6..b0c1659 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezProgressMonitor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezProgressMonitor.java @@ -94,6 +94,7 @@ if (progress != null) { // Map 1 .......... container SUCCEEDED 7 7 0 0 0 0 + // TODO: can we pass custom things thru the progress? results.add( Arrays.asList( getNameWithProgress(vertexName, progress.succeededTaskCount, progress.totalTaskCount), diff --git ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index 1828f0a..a9ebc90 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -1653,6 +1653,9 @@ public static boolean isRawFormat(Path baseOrDeltaDir, FileSystem fs) throws IOE //directory is empty or doesn't have any that could have been produced by load data return false; } + return isRawFormatFile(dataFile, fs); + } + public static boolean isRawFormatFile(Path dataFile, FileSystem fs) throws IOException { try { Reader reader = OrcFile.createReader(dataFile, OrcFile.readerOptions(fs.getConf())); /* diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 5ad4406..d76c8b6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -58,6 +58,8 @@ import javax.jdo.JDODataStoreException; import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Project; import org.apache.commons.io.FilenameUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileChecksum; @@ -1365,8 +1367,14 @@ public Table apply(org.apache.hadoop.hive.metastore.api.Table table) { HiveMaterializedViewsRegistry.get().getRewritingMaterializedView( dbName, materializedViewTable.getTableName()); if (materialization != null) { - RelOptHiveTable cachedMaterializedViewTable = - (RelOptHiveTable) materialization.tableRel.getTable(); + RelNode viewScan = materialization.tableRel; + RelOptHiveTable cachedMaterializedViewTable; + if (viewScan instanceof Project) { + // There is a Project on top (due to nullability) + cachedMaterializedViewTable = (RelOptHiveTable) viewScan.getInput(0).getTable(); + } else { + cachedMaterializedViewTable = (RelOptHiveTable) viewScan.getTable(); + } if (cachedMaterializedViewTable.getHiveTableMD().getCreateTime() == materializedViewTable.getCreateTime()) { // It is in the cache and up to date diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index 3f73fd7..53dc8ec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -377,9 +377,8 @@ private static RelNode createMaterializedViewScan(HiveConf conf, Table viewTable List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL); rowType = dtFactory.createStructType(druidColTypes, druidColNames); RelOptHiveTable optTable = new RelOptHiveTable(null, fullyQualifiedTabName, - rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList<>(), - conf, new HashMap<>(), new HashMap<>(), new AtomicInteger()); - + rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList<>(), + conf, new HashMap<>(), new HashMap<>(), new AtomicInteger()); DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals, null, null); @@ -390,8 +389,8 @@ private static RelNode createMaterializedViewScan(HiveConf conf, Table viewTable } else { // Build Hive Table Scan Rel RelOptHiveTable optTable = new RelOptHiveTable(null, fullyQualifiedTabName, - rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList<>(), - conf, new HashMap<>(), new HashMap<>(), new AtomicInteger()); + rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList<>(), + conf, new HashMap<>(), new HashMap<>(), new AtomicInteger()); tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, viewTable.getTableName(), null, false, false); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfPlannerContext.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfPlannerContext.java index b0f1a8d..b9b0e9e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfPlannerContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfPlannerContext.java @@ -20,11 +20,19 @@ public class HiveConfPlannerContext{ private boolean isCorrelatedColumns; + private boolean heuristicMaterializationStrategy; - public HiveConfPlannerContext(boolean isCorrelatedColumns) { + public HiveConfPlannerContext(boolean isCorrelatedColumns, boolean heuristicMaterializationStrategy) { this.isCorrelatedColumns = isCorrelatedColumns; + this.heuristicMaterializationStrategy = heuristicMaterializationStrategy; } - public boolean getIsCorrelatedColumns() { return isCorrelatedColumns;} + public boolean getIsCorrelatedColumns() { + return isCorrelatedColumns; + } + + public boolean isHeuristicMaterializationStrategy() { + return heuristicMaterializationStrategy; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java index efd8a35..e85a99e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite; +import org.apache.calcite.avatica.util.TimeUnitRange; import org.apache.calcite.plan.Context; import org.apache.calcite.plan.Contexts; import org.apache.calcite.plan.RelOptCluster; @@ -27,10 +28,21 @@ import org.apache.calcite.rex.RexUtil; import org.apache.calcite.schema.SchemaPlus; import org.apache.calcite.server.CalciteServerStatement; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.tools.FrameworkConfig; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction; +import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction; +import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction; +import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumEmptyIsZeroAggFunction; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; + +import java.util.HashMap; +import java.util.Map; /** @@ -107,4 +119,38 @@ public RelBuilder empty() { return this.push(sort); } + public static SqlFunction getFloorSqlFunction(TimeUnitRange flag) { + switch (flag) { + case YEAR: + return HiveFloorDate.YEAR; + case QUARTER: + return HiveFloorDate.QUARTER; + case MONTH: + return HiveFloorDate.MONTH; + case DAY: + return HiveFloorDate.DAY; + case HOUR: + return HiveFloorDate.HOUR; + case MINUTE: + return HiveFloorDate.MINUTE; + case SECOND: + return HiveFloorDate.SECOND; + } + return SqlStdOperatorTable.FLOOR; + } + + public static SqlAggFunction getRollup(SqlAggFunction aggregation) { + if (aggregation instanceof HiveSqlSumAggFunction + || aggregation instanceof HiveSqlMinMaxAggFunction + || aggregation instanceof HiveSqlSumEmptyIsZeroAggFunction) { + return aggregation; + } + if (aggregation instanceof HiveSqlCountAggFunction) { + HiveSqlCountAggFunction countAgg = (HiveSqlCountAggFunction) aggregation; + return new HiveSqlSumEmptyIsZeroAggFunction(countAgg.isDistinct(), countAgg.getReturnTypeInference(), + countAgg.getOperandTypeInference(), countAgg.getOperandTypeChecker()); + } + return null; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java index 88aedb6..fbf2202 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java @@ -18,13 +18,23 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.cost; +import com.google.common.collect.Multimap; import org.apache.calcite.adapter.druid.DruidQuery; +import org.apache.calcite.plan.Convention; import org.apache.calcite.plan.ConventionTraitDef; +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptCostImpl; import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.volcano.RelSubset; import org.apache.calcite.plan.volcano.VolcanoPlanner; import org.apache.calcite.rel.RelCollationTraitDef; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.commons.math3.util.FastMath; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveDruidRules; /** @@ -37,9 +47,14 @@ public class HiveVolcanoPlanner extends VolcanoPlanner { private static final boolean ENABLE_COLLATION_TRAIT = true; + private final boolean isHeuristic; + private static final double FACTOR = 0.2d; + + /** Creates a HiveVolcanoPlanner. */ public HiveVolcanoPlanner(HivePlannerContext conf) { super(HiveCost.FACTORY, conf); + isHeuristic = conf.unwrap(HiveConfPlannerContext.class).isHeuristicMaterializationStrategy(); } public static RelOptPlanner createPlanner(HivePlannerContext conf) { @@ -72,4 +87,56 @@ public void registerClass(RelNode node) { } super.registerClass(node); } + + /** + * The method extends the logic of the super method to decrease + * the cost of the plan if it contains materialized views + * (heuristic). + */ + public RelOptCost getCost(RelNode rel, RelMetadataQuery mq) { + assert rel != null : "pre-condition: rel != null"; + if (rel instanceof RelSubset) { + return getCost(((RelSubset) rel).getBest(), mq); + } + if (rel.getTraitSet().getTrait(ConventionTraitDef.INSTANCE) + == Convention.NONE) { + return costFactory.makeInfiniteCost(); + } + // We get the cost of the operator + RelOptCost cost = mq.getNonCumulativeCost(rel); + if (!costFactory.makeZeroCost().isLt(cost)) { + // cost must be positive, so nudge it + cost = costFactory.makeTinyCost(); + } + // If this operator has a materialized view below, + // we make its cost tiny and adjust the cost of its + // inputs + boolean usesMaterializedViews = false; + Multimap, RelNode> nodeTypes = + mq.getNodeTypes(rel); + for (RelNode scan : nodeTypes.get(TableScan.class)) { + if (((RelOptHiveTable) scan.getTable()).getHiveTableMD().isMaterializedView()) { + usesMaterializedViews = true; + break; + } + } + if (isHeuristic && usesMaterializedViews) { + cost = costFactory.makeTinyCost(); + for (RelNode input : rel.getInputs()) { + // If a child of this expression uses a materialized view, + // then we decrease its cost by a certain factor. This is + // useful for e.g. partial rewritings, where a part of plan + // does not use the materialization, but we still want to + // decrease its cost so it is chosen instead of the original + // plan + cost = cost.plus(getCost(input, mq).multiplyBy(FACTOR)); + } + } else { + // No materialized view or not heuristic approach, normal costing + for (RelNode input : rel.getInputs()) { + cost = cost.plus(getCost(input, mq)); + } + } + return cost; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java index 94a3bac..5fda6f9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java @@ -123,7 +123,7 @@ public RelNode copy(RelTraitSet traitSet, List inputs) { */ public HiveTableScan copy(RelDataType newRowtype) { return new HiveTableScan(getCluster(), getTraitSet(), ((RelOptHiveTable) table), this.tblAlias, this.concatQbIDAlias, - newRowtype, this.useQBIdInDigest, this.insideView); + newRowtype, this.useQBIdInDigest, this.insideView); } @Override public RelWriter explainTerms(RelWriter pw) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java index df9c180..fb9672a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java @@ -17,41 +17,214 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules.views; +import com.google.common.collect.ImmutableList; +import org.apache.calcite.avatica.util.TimeUnitRange; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.plan.hep.HepProgramBuilder; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewProjectFilterRule; import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewOnlyFilterRule; import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewProjectJoinRule; import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewOnlyJoinRule; import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewProjectAggregateRule; import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewOnlyAggregateRule; +import org.apache.calcite.rel.rules.ProjectRemoveRule; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Util; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; + +import java.util.List; +import java.util.Map; /** * Enable join and aggregate materialized view rewriting */ public class HiveMaterializedViewRule { + /** + * This PROGRAM will be executed when there is a partial rewriting + * (using union operator) to pull up the projection expressions + * on top of the input that executes the modified query. The goal + * of the program is to expose all available expressions below + * the root of the plan. + */ + private static final HepProgram PROGRAM = new HepProgramBuilder() + .addRuleInstance(HiveExtractRelNodeRule.INSTANCE) + .addRuleInstance(HiveTableScanProjectInsert.INSTANCE) + .addRuleCollection( + ImmutableList.of( + HiveFilterProjectTransposeRule.INSTANCE, + HiveJoinProjectTransposeRule.BOTH_PROJECT, + HiveJoinProjectTransposeRule.LEFT_PROJECT, + HiveJoinProjectTransposeRule.RIGHT_PROJECT, + HiveProjectMergeRule.INSTANCE)) + .addRuleInstance(ProjectRemoveRule.INSTANCE) + .addRuleInstance(HiveRootJoinProjectInsert.INSTANCE) + .build(); + public static final MaterializedViewProjectFilterRule INSTANCE_PROJECT_FILTER = - new MaterializedViewProjectFilterRule(HiveRelFactories.HIVE_BUILDER, true, - null, false); + new MaterializedViewProjectFilterRule(HiveRelFactories.HIVE_BUILDER, + true, PROGRAM, false); public static final MaterializedViewOnlyFilterRule INSTANCE_FILTER = - new MaterializedViewOnlyFilterRule(HiveRelFactories.HIVE_BUILDER, true, - null, false); + new MaterializedViewOnlyFilterRule(HiveRelFactories.HIVE_BUILDER, + true, PROGRAM, false); public static final MaterializedViewProjectJoinRule INSTANCE_PROJECT_JOIN = - new MaterializedViewProjectJoinRule(HiveRelFactories.HIVE_BUILDER, true, - null, false); + new MaterializedViewProjectJoinRule(HiveRelFactories.HIVE_BUILDER, + true, PROGRAM, false); public static final MaterializedViewOnlyJoinRule INSTANCE_JOIN = - new MaterializedViewOnlyJoinRule(HiveRelFactories.HIVE_BUILDER, true, - null, false); + new MaterializedViewOnlyJoinRule(HiveRelFactories.HIVE_BUILDER, + true, PROGRAM, false); + + public static final HiveMaterializedViewProjectAggregateRule INSTANCE_PROJECT_AGGREGATE = + new HiveMaterializedViewProjectAggregateRule(HiveRelFactories.HIVE_BUILDER, + true, PROGRAM); + + public static final HiveMaterializedViewOnlyAggregateRule INSTANCE_AGGREGATE = + new HiveMaterializedViewOnlyAggregateRule(HiveRelFactories.HIVE_BUILDER, + true, PROGRAM); + + + protected static class HiveMaterializedViewProjectAggregateRule extends MaterializedViewProjectAggregateRule { + public HiveMaterializedViewProjectAggregateRule( + RelBuilderFactory relBuilderFactory, boolean generateUnionRewriting, HepProgram unionRewritingPullProgram) { + super(relBuilderFactory, generateUnionRewriting, unionRewritingPullProgram); + } + + @Override + protected SqlFunction getFloorSqlFunction(TimeUnitRange flag) { + return HiveRelBuilder.getFloorSqlFunction(flag); + } + + @Override + public SqlAggFunction getRollup(SqlAggFunction aggregation) { + return HiveRelBuilder.getRollup(aggregation); + } + } + + protected static class HiveMaterializedViewOnlyAggregateRule extends MaterializedViewOnlyAggregateRule { + public HiveMaterializedViewOnlyAggregateRule( + RelBuilderFactory relBuilderFactory, boolean generateUnionRewriting, HepProgram unionRewritingPullProgram) { + super(relBuilderFactory, generateUnionRewriting, unionRewritingPullProgram); + } + + @Override + protected SqlFunction getFloorSqlFunction(TimeUnitRange flag) { + return HiveRelBuilder.getFloorSqlFunction(flag); + } + + @Override + public SqlAggFunction getRollup(SqlAggFunction aggregation) { + return HiveRelBuilder.getRollup(aggregation); + } + } + + /** + * This rule is used within the PROGRAM that rewrites the query for + * partial rewritings. Its goal is to extract the RelNode from the + * RelSubset node so the rest of the rules in the PROGRAM can be + * applied correctly. + */ + private static class HiveExtractRelNodeRule extends RelOptRule { + + private static final HiveExtractRelNodeRule INSTANCE = + new HiveExtractRelNodeRule(); + + private HiveExtractRelNodeRule() { + super(operand(RelSubset.class, any())); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final RelSubset rel = call.rel(0); + call.transformTo(Util.first(rel.getBest(), rel.getOriginal())); + } + } + + /** + * This rule inserts an identity Project operator on top of a TableScan. + * The rule is useful to pull-up the projection expressions during partial + * rewriting using Union operator, as we would like to have all those + * expressions available at the top of the input to insert Filter conditions + * if needed. + */ + private static class HiveTableScanProjectInsert extends RelOptRule { + + private static final HiveTableScanProjectInsert INSTANCE = + new HiveTableScanProjectInsert(); + + private HiveTableScanProjectInsert() { + super(operand(Filter.class, operand(TableScan.class, any())), + HiveRelFactories.HIVE_BUILDER, "HiveTableScanProjectInsert"); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final Filter fil = call.rel(0); + final TableScan rel = call.rel(1); + // Add identity + RelBuilder relBuilder = call.builder(); + relBuilder.push(rel); + List identityFields = relBuilder.fields( + ImmutableBitSet.range(0, rel.getRowType().getFieldCount()).asList()); + RelNode newRel = relBuilder + .project(identityFields, ImmutableList.of(), true) + .build(); + call.transformTo(fil.copy(fil.getTraitSet(), ImmutableList.of(newRel))); + } + + } + + /** + * This rule adds a Project operator on top of the root operator if it is a join. + * This is important to meet the requirements set by the rewriting rule with + * respect to the plan returned by the input program. + */ + private static class HiveRootJoinProjectInsert extends RelOptRule { + + private static final HiveRootJoinProjectInsert INSTANCE = + new HiveRootJoinProjectInsert(); - public static final MaterializedViewProjectAggregateRule INSTANCE_PROJECT_AGGREGATE = - new MaterializedViewProjectAggregateRule(HiveRelFactories.HIVE_BUILDER, true, - null); + private HiveRootJoinProjectInsert() { + super(operand(Join.class, any()), + HiveRelFactories.HIVE_BUILDER, "HiveRootJoinProjectInsert"); + } - public static final MaterializedViewOnlyAggregateRule INSTANCE_AGGREGATE = - new MaterializedViewOnlyAggregateRule(HiveRelFactories.HIVE_BUILDER, true, - null); + @Override + public void onMatch(RelOptRuleCall call) { + final Join join = call.rel(0); + final HepRelVertex root = (HepRelVertex) call.getPlanner().getRoot(); + if (root.getCurrentRel() != join) { + // Bail out + return; + } + // The join is the root, but we should always end up with a Project operator + // on top. We will add it. + RelBuilder relBuilder = call.builder(); + relBuilder.push(join); + List identityFields = relBuilder.fields( + ImmutableBitSet.range(0, join.getRowType().getFieldCount()).asList()); + relBuilder.project(identityFields, ImmutableList.of(), true); + call.transformTo(relBuilder.build()); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java index 4dc48f4..0b1fe74 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java @@ -71,7 +71,6 @@ import com.google.common.collect.HashMultimap; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; -import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.collect.Maps; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 612deb8..41de17f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -44,18 +44,14 @@ import java.util.concurrent.atomic.AtomicInteger; import com.google.common.collect.Iterables; -import com.google.common.collect.Maps; import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; import org.antlr.runtime.tree.Tree; import org.antlr.runtime.tree.TreeVisitor; import org.antlr.runtime.tree.TreeVisitorAction; -import org.apache.calcite.adapter.druid.DirectOperatorConversion; import org.apache.calcite.adapter.druid.DruidQuery; import org.apache.calcite.adapter.druid.DruidSchema; import org.apache.calcite.adapter.druid.DruidTable; -import org.apache.calcite.adapter.druid.ExtractOperatorConversion; -import org.apache.calcite.adapter.druid.FloorOperatorConversion; import org.apache.calcite.config.CalciteConnectionConfig; import org.apache.calcite.config.CalciteConnectionConfigImpl; import org.apache.calcite.config.CalciteConnectionProperty; @@ -110,13 +106,11 @@ import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.sql.SqlCall; import org.apache.calcite.sql.SqlExplainLevel; -import org.apache.calcite.sql.SqlFunction; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlLiteral; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.SqlWindow; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.ArraySqlType; import org.apache.calcite.sql.type.SqlTypeName; @@ -168,11 +162,8 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveConcat; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; @@ -366,15 +357,18 @@ private static RelOptPlanner createPlanner( HiveRulesRegistry registry = new HiveRulesRegistry(); Properties calciteConfigProperties = new Properties(); calciteConfigProperties.setProperty( - CalciteConnectionProperty.TIME_ZONE.camelName(), - conf.getLocalTimeZone().getId()); + CalciteConnectionProperty.TIME_ZONE.camelName(), + conf.getLocalTimeZone().getId()); calciteConfigProperties.setProperty( - CalciteConnectionProperty.MATERIALIZATIONS_ENABLED.camelName(), - Boolean.FALSE.toString()); + CalciteConnectionProperty.MATERIALIZATIONS_ENABLED.camelName(), + Boolean.FALSE.toString()); CalciteConnectionConfig calciteConfig = new CalciteConnectionConfigImpl(calciteConfigProperties); boolean isCorrelatedColumns = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_CORRELATED_MULTI_KEY_JOINS); + boolean heuristicMaterializationStrategy = HiveConf.getVar(conf, + HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_SELECTION_STRATEGY).equals("heuristic"); HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, calciteConfig, - corrScalarRexSQWithAgg, scalarAggNoGbyNoWin, new HiveConfPlannerContext(isCorrelatedColumns)); + corrScalarRexSQWithAgg, scalarAggNoGbyNoWin, + new HiveConfPlannerContext(isCorrelatedColumns, heuristicMaterializationStrategy)); return HiveVolcanoPlanner.createPlanner(confContext); } @@ -1487,11 +1481,21 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu calciteGenPlan = HiveRelDecorrelator.decorrelateQuery(calciteGenPlan); LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan)); + // 2. Apply pre-join order optimizations calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, mdProvider.getMetadataProvider(), executorProvider); - // 3. Apply join order optimizations: reordering MST algorithm + // 3. Materialized view based rewriting + // We disable it for CTAS and MV creation queries (trying to avoid any problem + // due to data freshness) + if (conf.getBoolVar(ConfVars.HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING) && + !getQB().isMaterializedView() && !ctx.isLoadingMaterializedView() && !getQB().isCTAS()) { + calcitePreCboPlan = applyMaterializedViewRewriting(planner, + calcitePreCboPlan, mdProvider.getMetadataProvider(), executorProvider); + } + + // 4. Apply join order optimizations: reordering MST algorithm // If join optimizations failed because of missing stats, we continue with // the rest of optimizations if (profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING)) { @@ -1538,100 +1542,13 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu disableSemJoinReordering = false; } - // 4.1. Run other optimizations that do not need stats + // 5. Run other optimizations that do not need stats perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, - HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, HiveUnionMergeRule.INSTANCE, - HiveProjectMergeRule.INSTANCE_NO_FORCE, HiveJoinCommuteRule.INSTANCE); + HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, HiveUnionMergeRule.INSTANCE, + HiveAggregateProjectMergeRule.INSTANCE, HiveProjectMergeRule.INSTANCE_NO_FORCE, HiveJoinCommuteRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Optimizations without stats 1"); - // 5. Materialized view based rewriting - // We disable it for CTAS and MV creation queries (trying to avoid any problem - // due to data freshness) - if (conf.getBoolVar(ConfVars.HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING) && - !getQB().isMaterializedView() && !ctx.isLoadingMaterializedView() && !getQB().isCTAS()) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - // Use Calcite cost model for view rewriting - RelMetadataProvider calciteMdProvider = DefaultRelMetadataProvider.INSTANCE; - RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(calciteMdProvider)); - planner.registerMetadataProviders(Lists.newArrayList(calciteMdProvider)); - // Add views to planner - List materializations = new ArrayList<>(); - try { - materializations = Hive.get().getValidMaterializedViews(rewrittenRebuild); - // We need to use the current cluster for the scan operator on views, - // otherwise the planner will throw an Exception (different planners) - materializations = Lists.transform(materializations, - new Function() { - @Override - public RelOptMaterialization apply(RelOptMaterialization materialization) { - final RelNode viewScan = materialization.tableRel; - final RelNode newViewScan; - if (viewScan instanceof Project) { - // There is a Project on top (due to nullability) - final Project pq = (Project) viewScan; - newViewScan = HiveProject.create(optCluster, copyNodeScan(pq.getInput()), - pq.getChildExps(), pq.getRowType(), Collections. emptyList()); - } else { - newViewScan = copyNodeScan(viewScan); - } - return new RelOptMaterialization(newViewScan, materialization.queryRel, null, - materialization.qualifiedTableName); - } - - private RelNode copyNodeScan(RelNode scan) { - final RelNode newScan; - if (scan instanceof DruidQuery) { - final DruidQuery dq = (DruidQuery) scan; - // Ideally we should use HiveRelNode convention. However, since Volcano planner - // throws in that case because DruidQuery does not implement the interface, - // we set it as Bindable. Currently, we do not use convention in Hive, hence that - // should be fine. - // TODO: If we want to make use of convention (e.g., while directly generating operator - // tree instead of AST), this should be changed. - newScan = DruidQuery.create(optCluster, optCluster.traitSetOf(BindableConvention.INSTANCE), - scan.getTable(), dq.getDruidTable(), - ImmutableList.of(dq.getTableScan())); - } else { - newScan = new HiveTableScan(optCluster, optCluster.traitSetOf(HiveRelNode.CONVENTION), - (RelOptHiveTable) scan.getTable(), scan.getTable().getQualifiedName().get(0), - null, false, false); - } - return newScan; - } - } - ); - } catch (HiveException e) { - LOG.warn("Exception loading materialized views", e); - } - if (!materializations.isEmpty()) { - for (RelOptMaterialization materialization : materializations) { - planner.addMaterialization(materialization); - } - // Add view-based rewriting rules to planner - planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_FILTER); - planner.addRule(HiveMaterializedViewRule.INSTANCE_FILTER); - planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_JOIN); - planner.addRule(HiveMaterializedViewRule.INSTANCE_JOIN); - planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_AGGREGATE); - planner.addRule(HiveMaterializedViewRule.INSTANCE_AGGREGATE); - // Optimize plan - planner.setRoot(calciteOptimizedPlan); - calciteOptimizedPlan = planner.findBestExp(); - // Remove view-based rewriting rules from planner - planner.clear(); - } - // Restore default cost model - RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider())); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: View-based rewriting"); - } - - // 4.2. Run other optimizations that do not need stats - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, - HepMatchOrder.BOTTOM_UP, HiveAggregateProjectMergeRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Optimizations without stats 2"); - // 6. Run aggregate-join transpose (cost based) // If it failed because of missing stats, we continue with // the rest of optimizations @@ -1919,6 +1836,97 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv return basePlan; } + private RelNode applyMaterializedViewRewriting(RelOptPlanner planner, RelNode basePlan, + RelMetadataProvider mdProvider, RexExecutor executorProvider) { + final RelOptCluster optCluster = basePlan.getCluster(); + final PerfLogger perfLogger = SessionState.getPerfLogger(); + + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + final RelNode calcitePreMVRewritingPlan = basePlan; + + // Add views to planner + List materializations = new ArrayList<>(); + try { + materializations = Hive.get().getValidMaterializedViews(rewrittenRebuild); + // We need to use the current cluster for the scan operator on views, + // otherwise the planner will throw an Exception (different planners) + materializations = Lists.transform(materializations, + new Function() { + @Override + public RelOptMaterialization apply(RelOptMaterialization materialization) { + final RelNode viewScan = materialization.tableRel; + final RelNode newViewScan; + if (viewScan instanceof Project) { + // There is a Project on top (due to nullability) + final Project pq = (Project) viewScan; + newViewScan = HiveProject.create(optCluster, copyNodeScan(pq.getInput()), + pq.getChildExps(), pq.getRowType(), Collections. emptyList()); + } else { + newViewScan = copyNodeScan(viewScan); + } + return new RelOptMaterialization(newViewScan, materialization.queryRel, null, + materialization.qualifiedTableName); + } + + private RelNode copyNodeScan(RelNode scan) { + final RelNode newScan; + if (scan instanceof DruidQuery) { + final DruidQuery dq = (DruidQuery) scan; + // Ideally we should use HiveRelNode convention. However, since Volcano planner + // throws in that case because DruidQuery does not implement the interface, + // we set it as Bindable. Currently, we do not use convention in Hive, hence that + // should be fine. + // TODO: If we want to make use of convention (e.g., while directly generating operator + // tree instead of AST), this should be changed. + newScan = DruidQuery.create(optCluster, optCluster.traitSetOf(BindableConvention.INSTANCE), + scan.getTable(), dq.getDruidTable(), + ImmutableList.of(dq.getTableScan())); + } else { + newScan = new HiveTableScan(optCluster, optCluster.traitSetOf(HiveRelNode.CONVENTION), + (RelOptHiveTable) scan.getTable(), scan.getTable().getQualifiedName().get(0), + null, false, false); + } + return newScan; + } + } + ); + } catch (HiveException e) { + LOG.warn("Exception loading materialized views", e); + } + if (!materializations.isEmpty()) { + // Use Calcite cost model for view rewriting + optCluster.invalidateMetadataQuery(); + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE)); + + // Add materializations to planner + for (RelOptMaterialization materialization : materializations) { + planner.addMaterialization(materialization); + } + // Add view-based rewriting rules to planner + planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_FILTER); + planner.addRule(HiveMaterializedViewRule.INSTANCE_FILTER); + planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_JOIN); + planner.addRule(HiveMaterializedViewRule.INSTANCE_JOIN); + planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_AGGREGATE); + planner.addRule(HiveMaterializedViewRule.INSTANCE_AGGREGATE); + // Optimize plan + planner.setRoot(basePlan); + basePlan = planner.findBestExp(); + // Remove view-based rewriting rules from planner + planner.clear(); + + // Restore default cost model + optCluster.invalidateMetadataQuery(); + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider)); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: View-based rewriting"); + if (calcitePreMVRewritingPlan != basePlan) { + // Now we trigger some needed optimization rules again + basePlan = applyPreJoinOrderingTransforms(basePlan, mdProvider, executorProvider); + } + } + return basePlan; + } + /** * Run the HEP Planner with the given rule set. * @@ -2508,17 +2516,16 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL); rowType = dtFactory.createStructType(druidColTypes, druidColNames); DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), - dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, - intervals, null, null); + dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, + intervals, null, null); RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, - rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, - partitionCache, colStatsCache, noColsMissingStats); + rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, + partitionCache, colStatsCache, noColsMissingStats); final TableScan scan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - optTable, null == tableAlias ? tabMetaData.getTableName() : tableAlias, - getAliasId(tableAlias, qb), HiveConf.getBoolVar(conf, - HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP), qb.isInsideView() - || qb.getAliasInsideView().contains(tableAlias.toLowerCase())); - // Default Druid Standard + optTable, null == tableAlias ? tabMetaData.getTableName() : tableAlias, + getAliasId(tableAlias, qb), HiveConf.getBoolVar(conf, + HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP), qb.isInsideView() + || qb.getAliasInsideView().contains(tableAlias.toLowerCase())); tableRel = DruidQuery.create(cluster, cluster.traitSetOf(BindableConvention.INSTANCE), optTable, druidTable, ImmutableList.of(scan), DruidSqlOperatorConverter.getDefaultMap()); } else { @@ -2533,8 +2540,8 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc fullyQualifiedTabName = tabMetaData.getTableName(); } RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, - rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, - partitionCache, colStatsCache, noColsMissingStats); + rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, + partitionCache, colStatsCache, noColsMissingStats); // Build Hive Table Scan Rel tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, null == tableAlias ? tabMetaData.getTableName() : tableAlias, diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 5b9ab3a..74c271d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1255,6 +1255,9 @@ private void analyzeAlterPool(ASTNode ast) throws SemanticException { addServiceOutput(); } if (poolChanges != null) { + if (!poolChanges.isSetPoolPath()) { + poolChanges.setPoolPath(poolPath); + } rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), new CreateOrAlterWMPoolDesc(poolChanges, poolPath, true)))); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index d5aace0..e49089b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -150,7 +150,8 @@ private URI initializeFromURI(String fromPath, boolean isLocal) throws IOExcepti } try { - srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI)); + FileSystem fileSystem = FileSystem.get(fromURI, conf); + srcs = matchFilesOrDir(fileSystem, new Path(fromURI)); if (srcs == null || srcs.length == 0) { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, "No files matching path " + fromURI)); @@ -162,6 +163,7 @@ private URI initializeFromURI(String fromPath, boolean isLocal) throws IOExcepti "source contains directory: " + oneSrc.getPath().toString())); } } + validateAcidFiles(table, srcs, fileSystem); // Do another loop if table is bucketed List bucketCols = table.getBucketCols(); if (bucketCols != null && !bucketCols.isEmpty()) { @@ -198,12 +200,24 @@ private URI initializeFromURI(String fromPath, boolean isLocal) throws IOExcepti if (bucketArray[bucketId]) { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg( "Multiple files for same bucket : " + bucketId - + ". Only 1 file per bucket allowed in single load command. To load multiple files for same bucket, use multiple statements for table " + + ". Only 1 file per bucket allowed in single load command. To load " + + "multiple files for same bucket, use multiple statements for table " + table.getFullyQualifiedName())); } bucketArray[bucketId] = true; } } + else { + /** + * for loading into un-bucketed acid table, files can be named arbitrarily but they will + * be renamed during load. + * {@link Hive#mvFile(HiveConf, FileSystem, Path, FileSystem, Path, boolean, boolean, + * boolean, int)} + * and + * {@link Hive#copyFiles(HiveConf, FileSystem, FileStatus[], FileSystem, Path, boolean, + * boolean, List, boolean)} + */ + } } catch (IOException e) { // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils @@ -213,6 +227,28 @@ private URI initializeFromURI(String fromPath, boolean isLocal) throws IOExcepti return Lists.newArrayList(srcs); } + /** + * Safety check to make sure a file take from one acid table is not added into another acid table + * since the ROW__IDs embedded as part a write to one table won't make sense in different + * table/cluster. + */ + private static void validateAcidFiles(Table table, FileStatus[] srcs, FileSystem fs) + throws SemanticException { + if(!AcidUtils.isFullAcidTable(table)) { + return; + } + try { + for (FileStatus oneSrc : srcs) { + if (!AcidUtils.MetaDataFile.isRawFormatFile(oneSrc.getPath(), fs)) { + throw new SemanticException(ErrorMsg.LOAD_DATA_ACID_FILE, oneSrc.getPath().toString()); + } + } + } + catch(IOException ex) { + throw new SemanticException(ex); + } + } + @Override public void analyzeInternal(ASTNode ast) throws SemanticException { boolean isLocal = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java index f2d3d33..3f82d16 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java @@ -73,7 +73,7 @@ public String getName() { return name; } public static final List nonNativeTableAllowedTypes = - ImmutableList.of(ADDPROPS, DROPPROPS); + ImmutableList.of(ADDPROPS, DROPPROPS, ADDCOLS); } public static enum ProtectModeType { diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java index 8a01de3..3710311 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java @@ -19,6 +19,7 @@ import org.apache.commons.io.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.junit.Assert; import org.junit.Rule; @@ -453,9 +454,30 @@ public void testAbort() throws Exception { * which will currently make the query non-vectorizable. This means we can't check the file name * for vectorized version of the test. */ - private void checkResult(String[][] expectedResult, String query, boolean isVectorized, String msg) throws Exception{ + private void checkResult(String[][] expectedResult, String query, boolean isVectorized, + String msg) throws Exception{ List rs = runStatementOnDriver(query); checkExpected(rs, expectedResult, msg + (isVectorized ? " vect" : ""), LOG, !isVectorized); assertVectorized(isVectorized, query); } + @Test + public void testLoadAcidFile() throws Exception { + MetastoreConf.setBoolVar(hiveConf, MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID, true); + runStatementOnDriver("drop table if exists T"); + runStatementOnDriver("drop table if exists T2"); + runStatementOnDriver( + "create table T (a int, b int) stored as orc"); + //This is just a simple way to generate test data + runStatementOnDriver("create table T2(a int, b int) stored as orc"); + runStatementOnDriver("insert into T values(1,2)"); + List rs = runStatementOnDriver("select INPUT__FILE__NAME from T"); + Assert.assertEquals(1, rs.size()); + Assert.assertTrue("Unexpcted file name", rs.get(0) + .endsWith("t/delta_0000001_0000001_0000/bucket_00000")); + //T2 is an acid table so this should fail + CommandProcessorResponse cpr = runStatementOnDriverNegative( + "load data local inpath '" + rs.get(0) + "' into table T2"); + Assert.assertEquals("Unexpected error code", + ErrorMsg.LOAD_DATA_ACID_FILE.getErrorCode(), cpr.getErrorCode()); + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java index 20a5947..61bf84c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java @@ -107,8 +107,9 @@ public void stop() { } @Override - public void updateSessionsAsync(Double totalMaxAlloc, List sessions) { + public int updateSessionsAsync(Double totalMaxAlloc, List sessions) { isCalled = true; + return 0; } @Override @@ -123,6 +124,11 @@ void assertWasCalledAndReset() { @Override public void setClusterChangedCallback(Runnable clusterChangedCallback) { } + + @Override + public int translateAllocationToCpus(double allocation) { + return 0; + } } public static WMResourcePlan plan() { @@ -308,7 +314,7 @@ public void testReopen() throws Exception { assertNotSame(session, session2); wm.addTestEvent().get(); assertEquals(session2.toString(), 1.0, session2.getClusterFraction(), EPSILON); - assertEquals(0.0, session.getClusterFraction(), EPSILON); + assertFalse(session.hasClusterFraction()); qam.assertWasCalledAndReset(); } @@ -329,14 +335,14 @@ public void testDestroyAndReturn() throws Exception { assertNotSame(session, session2); session.destroy(); // Destroy before returning to the pool. assertEquals(1.0, session2.getClusterFraction(), EPSILON); - assertEquals(0.0, session.getClusterFraction(), EPSILON); + assertFalse(session.hasClusterFraction()); qam.assertWasCalledAndReset(); // We never lose pool session, so we should still be able to get. session = (WmTezSession) wm.getSession(null, mappingInput("user"), conf); session.returnToSessionManager(); assertEquals(1.0, session2.getClusterFraction(), EPSILON); - assertEquals(0.0, session.getClusterFraction(), EPSILON); + assertFalse(session.hasClusterFraction()); qam.assertWasCalledAndReset(); } @@ -1089,7 +1095,7 @@ public void testMoveSessionsMultiPool() throws Exception { assertEquals(0, allSessionProviders.get("B.x").getSessions().size()); assertEquals(0, allSessionProviders.get("B.y").getSessions().size()); assertEquals(0, allSessionProviders.get("C").getSessions().size()); - assertEquals(0.0f, sessionA1.getClusterFraction(), EPSILON); + assertFalse(sessionA1.hasClusterFraction()); assertFalse(allSessionProviders.get("A").getSessions().contains(sessionA1)); } @@ -1207,7 +1213,7 @@ private SampleTezSessionState validatePoolAfterCleanup( assertNotNull(theOnlySession); theOnlySession.setWaitForAmRegistryFuture(null); assertNull(oldSession.getPoolName()); - assertEquals(0f, oldSession.getClusterFraction(), EPSILON); + assertFalse(oldSession.hasClusterFraction()); pool.returnSession(theOnlySession); // Make sure we can actually get a session still - parallelism/etc. should not be affected. WmTezSession result = (WmTezSession) wm.getSession(null, mappingInput("A"), conf); @@ -1219,7 +1225,7 @@ private SampleTezSessionState validatePoolAfterCleanup( private void assertKilledByWm(WmTezSession session) { assertNull(session.getPoolName()); - assertEquals(0f, session.getClusterFraction(), EPSILON); + assertFalse(session.hasClusterFraction()); assertTrue(session.isIrrelevantForWm()); } diff --git ql/src/test/queries/clientpositive/druidmini_test_alter.q ql/src/test/queries/clientpositive/druidmini_test_alter.q new file mode 100644 index 0000000..15ae952 --- /dev/null +++ ql/src/test/queries/clientpositive/druidmini_test_alter.q @@ -0,0 +1,52 @@ +CREATE TABLE druid_alltypesorc +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (`ctimestamp2` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1 +FROM alltypesorc where ctimestamp2 IS NOT NULL; + +DESCRIBE druid_alltypesorc; + +DESCRIBE extended druid_alltypesorc; + +SELECT COUNT(*) FROM druid_alltypesorc; + +ALTER TABLE druid_alltypesorc ADD COLUMNS (cstring2 string, cboolean2 boolean, cint2 int); + +DESCRIBE druid_alltypesorc; + +DESCRIBE extended druid_alltypesorc; + +SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL; + +INSERT INTO TABLE druid_alltypesorc + SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1, +cstring2, +cboolean2, +cint +FROM alltypesorc where ctimestamp1 IS NOT NULL; + + +SELECT COUNT(*) FROM druid_alltypesorc; + +SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NULL; + +SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL; + +DROP TABLE druid_alltypesorc; diff --git ql/src/test/queries/clientpositive/materialized_view_rewrite_1.q ql/src/test/queries/clientpositive/materialized_view_rewrite_1.q new file mode 100644 index 0000000..fa60475 --- /dev/null +++ ql/src/test/queries/clientpositive/materialized_view_rewrite_1.q @@ -0,0 +1,183 @@ +-- SORT_QUERY_RESULTS + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; + +create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250); +analyze table emps compute statistics for columns; + +create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); +analyze table depts compute statistics for columns; + +create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into dependents values (10, 'Michael'), (10, 'Jane'); +analyze table dependents compute statistics for columns; + +create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into locations values (10, 'San Francisco'), (10, 'San Diego'); +analyze table locations compute statistics for columns; + +alter table emps add constraint pk1 primary key (empid) disable novalidate rely; +alter table depts add constraint pk2 primary key (deptno) disable novalidate rely; +alter table dependents add constraint pk3 primary key (empid) disable novalidate rely; +alter table locations add constraint pk4 primary key (locationid) disable novalidate rely; + +alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely; +alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely; + +-- EXAMPLE 1 +create materialized view mv1 enable rewrite as +select * from emps where empid < 150; +analyze table mv1 compute statistics for columns; + +explain +select * +from (select * from emps where empid < 120) t +join depts using (deptno); + +select * +from (select * from emps where empid < 120) t +join depts using (deptno); + +drop materialized view mv1; + +-- EXAMPLE 2 +create materialized view mv1 enable rewrite as +select deptno, name, salary, commission +from emps; +analyze table mv1 compute statistics for columns; + +explain +select emps.name, emps.salary, emps.commission +from emps +join depts using (deptno); + +select emps.name, emps.salary, emps.commission +from emps +join depts using (deptno); + +drop materialized view mv1; + +-- EXAMPLE 3 +create materialized view mv1 enable rewrite as +select empid deptno from emps +join depts using (deptno); +analyze table mv1 compute statistics for columns; + +explain +select empid deptno from emps +join depts using (deptno) where empid = 1; + +select empid deptno from emps +join depts using (deptno) where empid = 1; + +drop materialized view mv1; + +-- EXAMPLE 4 +create materialized view mv1 enable rewrite as +select * from emps where empid < 200; +analyze table mv1 compute statistics for columns; + +explain +select * from emps where empid > 120 +union all select * from emps where empid < 150; + +select * from emps where empid > 120 +union all select * from emps where empid < 150; + +drop materialized view mv1; + +-- EXAMPLE 5 - NO MV, ALREADY UNIQUE +create materialized view mv1 enable rewrite as +select empid, deptno from emps group by empid, deptno; +analyze table mv1 compute statistics for columns; + +explain +select empid, deptno from emps group by empid, deptno; + +select empid, deptno from emps group by empid, deptno; + +drop materialized view mv1; + +-- EXAMPLE 5 - NO MV, ALREADY UNIQUE +create materialized view mv1 enable rewrite as +select empid, name from emps group by empid, name; +analyze table mv1 compute statistics for columns; + +explain +select empid, name from emps group by empid, name; + +select empid, name from emps group by empid, name; + +drop materialized view mv1; + +-- EXAMPLE 5 +create materialized view mv1 enable rewrite as +select name, salary from emps group by name, salary; +analyze table mv1 compute statistics for columns; + +explain +select name, salary from emps group by name, salary; + +select name, salary from emps group by name, salary; + +drop materialized view mv1; + +-- EXAMPLE 6 +create materialized view mv1 enable rewrite as +select name, salary from emps group by name, salary; +analyze table mv1 compute statistics for columns; + +explain +select name from emps group by name; + +select name from emps group by name; + +drop materialized view mv1; + +-- EXAMPLE 7 +create materialized view mv1 enable rewrite as +select name, salary from emps where deptno = 10 group by name, salary; +analyze table mv1 compute statistics for columns; + +explain +select name from emps where deptno = 10 group by name; + +select name from emps where deptno = 10 group by name; + +drop materialized view mv1; + +-- EXAMPLE 9 +create materialized view mv1 enable rewrite as +select name, salary, count(*) as c, sum(empid) as s +from emps group by name, salary; +analyze table mv1 compute statistics for columns; + +explain +select name from emps group by name; + +select name from emps group by name; + +drop materialized view mv1; diff --git ql/src/test/queries/clientpositive/materialized_view_rewrite_2.q ql/src/test/queries/clientpositive/materialized_view_rewrite_2.q new file mode 100644 index 0000000..1688388 --- /dev/null +++ ql/src/test/queries/clientpositive/materialized_view_rewrite_2.q @@ -0,0 +1,163 @@ +-- SORT_QUERY_RESULTS + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; + +create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250); +analyze table emps compute statistics for columns; + +create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); +analyze table depts compute statistics for columns; + +create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into dependents values (10, 'Michael'), (10, 'Jane'); +analyze table dependents compute statistics for columns; + +create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into locations values (10, 'San Francisco'), (10, 'San Diego'); +analyze table locations compute statistics for columns; + +alter table emps add constraint pk1 primary key (empid) disable novalidate rely; +alter table depts add constraint pk2 primary key (deptno) disable novalidate rely; +alter table dependents add constraint pk3 primary key (empid) disable novalidate rely; +alter table locations add constraint pk4 primary key (locationid) disable novalidate rely; + +alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely; +alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely; + +-- EXAMPLE 16 +create materialized view mv1 enable rewrite as +select empid, depts.deptno from emps +join depts using (deptno) where depts.deptno > 10 +group by empid, depts.deptno; +analyze table mv1 compute statistics for columns; + +explain +select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno; + +select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno; + +drop materialized view mv1; + +-- EXAMPLE 17 +create materialized view mv1 enable rewrite as +select depts.deptno, empid from depts +join emps using (deptno) where depts.deptno > 10 +group by empid, depts.deptno; +analyze table mv1 compute statistics for columns; + +explain +select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno; + +select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno; + +drop materialized view mv1; + +-- EXAMPLE 18 +create materialized view mv1 enable rewrite as +select empid, depts.deptno from emps +join depts using (deptno) where emps.deptno > 10 +group by empid, depts.deptno; +analyze table mv1 compute statistics for columns; + +explain +select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno; + +select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno; + +drop materialized view mv1; + +-- EXAMPLE 19 +create materialized view mv1 enable rewrite as +select depts.deptno, emps.empid from depts +join emps using (deptno) where emps.empid > 10 +group by depts.deptno, emps.empid; +analyze table mv1 compute statistics for columns; + +explain +select depts.deptno from depts +join emps using (deptno) where emps.empid > 15 +group by depts.deptno, emps.empid; + +select depts.deptno from depts +join emps using (deptno) where emps.empid > 15 +group by depts.deptno, emps.empid; + +drop materialized view mv1; + +-- EXAMPLE 20 +create materialized view mv1 enable rewrite as +select depts.deptno, emps.empid from depts +join emps using (deptno) where emps.empid > 10 +group by depts.deptno, emps.empid; +analyze table mv1 compute statistics for columns; + +explain +select depts.deptno from depts +join emps using (deptno) where emps.empid > 15 +group by depts.deptno; + +select depts.deptno from depts +join emps using (deptno) where emps.empid > 15 +group by depts.deptno; + +drop materialized view mv1; + +-- EXAMPLE 23 +create materialized view mv1 enable rewrite as +select depts.name, dependents.name as name2, emps.deptno, depts.deptno as deptno2, dependents.empid +from depts, dependents, emps +where depts.deptno > 10 +group by depts.name, dependents.name, emps.deptno, depts.deptno, dependents.empid; +analyze table mv1 compute statistics for columns; + +explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 +group by dependents.empid; + +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 +group by dependents.empid; + +drop materialized view mv1; diff --git ql/src/test/queries/clientpositive/materialized_view_rewrite_3.q ql/src/test/queries/clientpositive/materialized_view_rewrite_3.q new file mode 100644 index 0000000..902e616 --- /dev/null +++ ql/src/test/queries/clientpositive/materialized_view_rewrite_3.q @@ -0,0 +1,114 @@ +-- SORT_QUERY_RESULTS + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; + +create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250); +analyze table emps compute statistics for columns; + +create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); +analyze table depts compute statistics for columns; + +create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into dependents values (10, 'Michael'), (10, 'Jane'); +analyze table dependents compute statistics for columns; + +create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into locations values (10, 'San Francisco'), (10, 'San Diego'); +analyze table locations compute statistics for columns; + +alter table emps add constraint pk1 primary key (empid) disable novalidate rely; +alter table depts add constraint pk2 primary key (deptno) disable novalidate rely; +alter table dependents add constraint pk3 primary key (empid) disable novalidate rely; +alter table locations add constraint pk4 primary key (locationid) disable novalidate rely; + +alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely; +alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely; + +-- EXAMPLE 34 +create materialized view mv1 enable rewrite as +select empid deptno from emps +join depts using (deptno); +analyze table mv1 compute statistics for columns; + +explain +select empid deptno from emps +join depts using (deptno) where empid = 1; + +select empid deptno from emps +join depts using (deptno) where empid = 1; + +drop materialized view mv1; + +-- EXAMPLE 35 +create materialized view mv1 enable rewrite as +select cast(empid as BIGINT) from emps +join depts using (deptno); +analyze table mv1 compute statistics for columns; + +explain +select empid deptno from emps +join depts using (deptno) where empid > 1; + +select empid deptno from emps +join depts using (deptno) where empid > 1; + +drop materialized view mv1; + +-- EXAMPLE 36 +create materialized view mv1 enable rewrite as +select cast(empid as BIGINT) from emps +join depts using (deptno); +analyze table mv1 compute statistics for columns; + +explain +select empid deptno from emps +join depts using (deptno) where empid = 1; + +select empid deptno from emps +join depts using (deptno) where empid = 1; + +drop materialized view mv1; + +-- EXAMPLE 38 +create materialized view mv1 enable rewrite as +select depts.name +from emps +join depts on (emps.deptno = depts.deptno); +analyze table mv1 compute statistics for columns; + +explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno); + +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno); + +drop materialized view mv1; + diff --git ql/src/test/queries/clientpositive/materialized_view_rewrite_4.q ql/src/test/queries/clientpositive/materialized_view_rewrite_4.q new file mode 100644 index 0000000..2c0fadb --- /dev/null +++ ql/src/test/queries/clientpositive/materialized_view_rewrite_4.q @@ -0,0 +1,177 @@ +-- SORT_QUERY_RESULTS + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; + +create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250); +analyze table emps compute statistics for columns; + +create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); +analyze table depts compute statistics for columns; + +create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into dependents values (10, 'Michael'), (10, 'Jane'); +analyze table dependents compute statistics for columns; + +create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into locations values (10, 'San Francisco'), (10, 'San Diego'); +analyze table locations compute statistics for columns; + +alter table emps add constraint pk1 primary key (empid) disable novalidate rely; +alter table depts add constraint pk2 primary key (deptno) disable novalidate rely; +alter table dependents add constraint pk3 primary key (empid) disable novalidate rely; +alter table locations add constraint pk4 primary key (locationid) disable novalidate rely; + +alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely; +alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely; + + +-- EXAMPLE 10 +create materialized view mv1 enable rewrite as +select name, salary, count(*) as c, sum(empid) as s +from emps group by name, salary; +analyze table mv1 compute statistics for columns; + +explain +select name, count(*) as c, sum(empid) as s +from emps group by name; + +select name, count(*) as c, sum(empid) as s +from emps group by name; + +drop materialized view mv1; + +-- EXAMPLE 11 +create materialized view mv1 enable rewrite as +select name, salary, count(*) as c, sum(empid) as s +from emps group by name, salary; +analyze table mv1 compute statistics for columns; + +explain +select salary, name, sum(empid) as s, count(*) as c +from emps group by name, salary; + +select salary, name, sum(empid) as s, count(*) as c +from emps group by name, salary; + +drop materialized view mv1; + +-- EXAMPLE 25 +create materialized view mv1 enable rewrite as +select empid, emps.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by empid, emps.deptno; +analyze table mv1 compute statistics for columns; + +explain +select depts.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by depts.deptno; + +select depts.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by depts.deptno; + +drop materialized view mv1; + +-- EXAMPLE 27 +create materialized view mv1 enable rewrite as +select empid, emps.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +where emps.deptno >= 10 group by empid, emps.deptno; +analyze table mv1 compute statistics for columns; + +explain +select depts.deptno, sum(empid) as s +from emps join depts using (deptno) +where emps.deptno > 10 group by depts.deptno; + +select depts.deptno, sum(empid) as s +from emps join depts using (deptno) +where emps.deptno > 10 group by depts.deptno; + +drop materialized view mv1; + +-- EXAMPLE 28 +create materialized view mv1 enable rewrite as +select empid, depts.deptno, count(*) + 1 as c, sum(empid) as s +from emps join depts using (deptno) +where depts.deptno >= 10 group by empid, depts.deptno; +analyze table mv1 compute statistics for columns; + +explain +select depts.deptno, sum(empid) + 1 as s +from emps join depts using (deptno) +where depts.deptno > 10 group by depts.deptno; + +select depts.deptno, sum(empid) + 1 as s +from emps join depts using (deptno) +where depts.deptno > 10 group by depts.deptno; + +drop materialized view mv1; + +-- EXAMPLE 29 +create materialized view mv1 enable rewrite as +select depts.name, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +group by depts.name; +analyze table mv1 compute statistics for columns; + +explain +select dependents.empid, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (depts.name = dependents.name) +group by dependents.empid; + +select dependents.empid, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (depts.name = dependents.name) +group by dependents.empid; + +drop materialized view mv1; + +-- EXAMPLE 32 +create materialized view mv1 enable rewrite as +select dependents.empid, emps.deptno, count(distinct salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno; +analyze table mv1 compute statistics for columns; + +explain +select emps.deptno, count(distinct salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno; + +select emps.deptno, count(distinct salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno; + +drop materialized view mv1; diff --git ql/src/test/queries/clientpositive/materialized_view_rewrite_5.q ql/src/test/queries/clientpositive/materialized_view_rewrite_5.q new file mode 100644 index 0000000..fc4445c --- /dev/null +++ ql/src/test/queries/clientpositive/materialized_view_rewrite_5.q @@ -0,0 +1,296 @@ +-- SORT_QUERY_RESULTS + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; + +create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250); +analyze table emps compute statistics for columns; + +create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); +analyze table depts compute statistics for columns; + +create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into dependents values (10, 'Michael'), (10, 'Jane'); +analyze table dependents compute statistics for columns; + +create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into locations values (10, 'San Francisco'), (10, 'San Diego'); +analyze table locations compute statistics for columns; + +alter table emps add constraint pk1 primary key (empid) disable novalidate rely; +alter table depts add constraint pk2 primary key (deptno) disable novalidate rely; +alter table dependents add constraint pk3 primary key (empid) disable novalidate rely; +alter table locations add constraint pk4 primary key (locationid) disable novalidate rely; + +alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely; +alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely; + +alter table emps change column deptno deptno int constraint nn1 not null disable novalidate rely; +alter table depts change column locationid locationid int constraint nn2 not null disable novalidate rely; + + +-- EXAMPLE 8 +create materialized view mv1 enable rewrite as +select name, deptno, salary from emps where deptno > 15 group by name, deptno, salary; +analyze table mv1 compute statistics for columns; + +explain +select name from emps where deptno >= 20 group by name; + +select name from emps where deptno >= 20 group by name; + +drop materialized view mv1; + +-- EXAMPLE 12 +create materialized view mv1 enable rewrite as +select name, deptno, salary, count(*) as c, sum(empid) as s +from emps where deptno >= 15 group by name, deptno, salary; +analyze table mv1 compute statistics for columns; + +explain +select name, sum(empid) as s +from emps where deptno > 15 group by name; + +select name, sum(empid) as s +from emps where deptno > 15 group by name; + +drop materialized view mv1; + +-- EXAMPLE 22 +create materialized view mv1 enable rewrite as +select depts.deptno, dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 and depts.deptno < 20 +group by depts.deptno, dependents.empid; +analyze table mv1 compute statistics for columns; + +explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 11 and depts.deptno < 19 +group by dependents.empid; + +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 11 and depts.deptno < 19 +group by dependents.empid; + +drop materialized view mv1; + +-- EXAMPLE 24 +create materialized view mv1 enable rewrite as +select empid, depts.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by empid, depts.deptno; +analyze table mv1 compute statistics for columns; + +explain +select deptno from emps group by deptno; + +select deptno from emps group by deptno; + +drop materialized view mv1; + +-- EXAMPLE 26 +create materialized view mv1 enable rewrite as +select empid, depts.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by empid, depts.deptno; +analyze table mv1 compute statistics for columns; + +explain +select deptno, empid, sum(empid) as s, count(*) as c +from emps group by empid, deptno; + +select deptno, empid, sum(empid) as s, count(*) as c +from emps group by empid, deptno; + +drop materialized view mv1; + +-- EXAMPLE 30 +create materialized view mv1 enable rewrite as +select dependents.empid, emps.deptno, sum(salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno; +analyze table mv1 compute statistics for columns; + +explain +select dependents.empid, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +group by dependents.empid; + +select dependents.empid, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +group by dependents.empid; + +drop materialized view mv1; + +-- EXAMPLE 31 +create materialized view mv1 enable rewrite as +select dependents.empid, emps.deptno, sum(salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno; +analyze table mv1 compute statistics for columns; + +explain +select depts.name, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +group by depts.name; + +select depts.name, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +group by depts.name; + +drop materialized view mv1; + +-- EXAMPLE 41 +create materialized view mv1 enable rewrite as +select a.empid deptno from +(select * from emps where empid = 1) a +join depts on (a.deptno = depts.deptno) +join dependents on (a.empid = dependents.empid); +analyze table mv1 compute statistics for columns; + +explain +select a.empid from +(select * from emps where empid = 1) a +join dependents on (a.empid = dependents.empid); + +select a.empid from +(select * from emps where empid = 1) a +join dependents on (a.empid = dependents.empid); + +drop materialized view mv1; + +-- EXAMPLE 42 +create materialized view mv1 enable rewrite as +select a.empid, a.deptno from +(select * from emps where empid = 1) a +join depts on (a.deptno = depts.deptno) +join dependents on (a.empid = dependents.empid); +analyze table mv1 compute statistics for columns; + +explain +select a.empid from +(select * from emps where empid = 1) a +join dependents on (a.empid = dependents.empid); + +select a.empid from +(select * from emps where empid = 1) a +join dependents on (a.empid = dependents.empid); + +drop materialized view mv1; + +-- EXAMPLE 43 +create materialized view mv1 enable rewrite as +select empid deptno from +(select * from emps where empid = 1) a +join depts on (a.deptno = depts.deptno); +analyze table mv1 compute statistics for columns; + +explain +select empid from emps where empid = 1; + +select empid from emps where empid = 1; + +drop materialized view mv1; + +-- EXAMPLE 44 +create materialized view mv1 enable rewrite as +select emps.empid, emps.deptno from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1; +analyze table mv1 compute statistics for columns; + +explain +select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1; + +select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1; + +drop materialized view mv1; + +-- EXAMPLE 45a +create materialized view mv1 enable rewrite as +select emps.empid, emps.deptno from emps +join depts a on (emps.deptno=a.deptno) +join depts b on (emps.deptno=b.deptno) +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1; +analyze table mv1 compute statistics for columns; + +explain +select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1; + +select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1; + +drop materialized view mv1; + +-- EXAMPLE 45b +create materialized view mv1 enable rewrite as +select emps.empid, emps.deptno from emps +join depts a on (emps.deptno=a.deptno) +join depts b on (emps.deptno=b.deptno) +join dependents on (emps.empid = dependents.empid) +where emps.name = 'Sebastian'; +analyze table mv1 compute statistics for columns; + +explain +select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.name = 'Sebastian'; + +select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.name = 'Sebastian'; + +drop materialized view mv1; diff --git ql/src/test/queries/clientpositive/materialized_view_rewrite_6.q ql/src/test/queries/clientpositive/materialized_view_rewrite_6.q new file mode 100644 index 0000000..cc844c5 --- /dev/null +++ ql/src/test/queries/clientpositive/materialized_view_rewrite_6.q @@ -0,0 +1,145 @@ +-- SORT_QUERY_RESULTS + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; + +create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250); +analyze table emps compute statistics for columns; + +create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); +analyze table depts compute statistics for columns; + +create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into dependents values (10, 'Michael'), (10, 'Jane'); +analyze table dependents compute statistics for columns; + +create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into locations values (10, 'San Francisco'), (10, 'San Diego'); +analyze table locations compute statistics for columns; + +alter table emps add constraint pk1 primary key (empid) disable novalidate rely; +alter table depts add constraint pk2 primary key (deptno) disable novalidate rely; +alter table dependents add constraint pk3 primary key (empid) disable novalidate rely; +alter table locations add constraint pk4 primary key (locationid) disable novalidate rely; + +alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely; +alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely; + +alter table emps change column deptno deptno int constraint nn1 not null disable novalidate rely; +alter table depts change column locationid locationid int constraint nn2 not null disable novalidate rely; + + +-- EXAMPLE 13 +create materialized view mv1 enable rewrite as +select name, deptno, salary, count(*) + 1 as c, sum(empid) as s +from emps where deptno >= 10 group by name, deptno, salary; +analyze table mv1 compute statistics for columns; + +explain +select salary, sum(empid) + 1 as s +from emps where deptno > 10 group by salary; + +select salary, sum(empid) + 1 as s +from emps where deptno > 10 group by salary; + +drop materialized view mv1; + +-- EXAMPLE 14 +create materialized view mv1 enable rewrite as +select name, deptno, salary, count(*) + 1 as c, sum(empid) as s +from emps where deptno >= 15 group by name, deptno, salary; +analyze table mv1 compute statistics for columns; + +explain +select salary + 1, sum(empid) + 1 as s +from emps where deptno > 15 group by salary; + +select salary + 1, sum(empid) + 1 as s +from emps where deptno > 15 group by salary; + +drop materialized view mv1; + +-- EXAMPLE 37 +create materialized view mv1 enable rewrite as +select depts.name +from emps +join depts on (emps.deptno = depts.deptno); +analyze table mv1 compute statistics for columns; + +explain +select dependents.empid +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (depts.name = dependents.name); + +select dependents.empid +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (depts.name = dependents.name); + +drop materialized view mv1; + +-- EXAMPLE 39 +create materialized view mv1 enable rewrite as +select depts.name +from emps +join depts on (emps.deptno = depts.deptno); +analyze table mv1 compute statistics for columns; + +explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno); + +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno); + +drop materialized view mv1; + +-- EXAMPLE 46 +create materialized view mv1 enable rewrite as +select emps.empid, emps.deptno, emps.name as name1, emps.salary, emps.commission, dependents.name as name2 +from emps join dependents on (emps.empid = dependents.empid); +analyze table mv1 compute statistics for columns; + +explain +select emps.empid, dependents.empid, emps.deptno +from emps +join dependents on (emps.empid = dependents.empid) +join depts a on (emps.deptno=a.deptno) +where emps.name = 'Bill'; + +select emps.empid, dependents.empid, emps.deptno +from emps +join dependents on (emps.empid = dependents.empid) +join depts a on (emps.deptno=a.deptno) +where emps.name = 'Bill'; + +drop materialized view mv1; diff --git ql/src/test/queries/clientpositive/materialized_view_rewrite_7.q ql/src/test/queries/clientpositive/materialized_view_rewrite_7.q new file mode 100644 index 0000000..66ff561 --- /dev/null +++ ql/src/test/queries/clientpositive/materialized_view_rewrite_7.q @@ -0,0 +1,137 @@ +-- SORT_QUERY_RESULTS + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; + +create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250); +analyze table emps compute statistics for columns; + +create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); +analyze table depts compute statistics for columns; + +create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into dependents values (10, 'Michael'), (10, 'Jane'); +analyze table dependents compute statistics for columns; + +create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into locations values (10, 'San Francisco'), (10, 'San Diego'); +analyze table locations compute statistics for columns; + +alter table emps add constraint pk1 primary key (empid) disable novalidate rely; +alter table depts add constraint pk2 primary key (deptno) disable novalidate rely; +alter table dependents add constraint pk3 primary key (empid) disable novalidate rely; +alter table locations add constraint pk4 primary key (locationid) disable novalidate rely; + +alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely; +alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely; + +alter table emps change column deptno deptno int constraint nn1 not null disable novalidate rely; +alter table depts change column locationid locationid int constraint nn2 not null disable novalidate rely; + + +-- EXAMPLE 21 -- WORKS NOW +create materialized view mv1 enable rewrite as +select depts.deptno, dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 11 +group by depts.deptno, dependents.empid; +analyze table mv1 compute statistics for columns; + +explain +select dependents.empid, depts.deptno +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 +group by dependents.empid, depts.deptno; + +select dependents.empid, depts.deptno +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 +group by dependents.empid, depts.deptno; + +drop materialized view mv1; + +-- EXAMPLE 33 +create materialized view mv1 enable rewrite as +select depts.deptno, dependents.empid, count(emps.salary) as s +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 11 and depts.deptno < 19 +group by depts.deptno, dependents.empid; +analyze table mv1 compute statistics for columns; + +explain +select dependents.empid, count(emps.salary) + 1 +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 and depts.deptno < 20 +group by dependents.empid; + +select dependents.empid, count(emps.salary) + 1 +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 and depts.deptno < 20 +group by dependents.empid; + +drop materialized view mv1; + +-- EXAMPLE 40 -- REWRITING HAPPENS BUT DISCARDED +-- DUE TO COST EXCEPT WITH HEURISTICS +create materialized view mv1 enable rewrite as +select depts.deptno, dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno >= 10; +analyze table mv1 compute statistics for columns; + +explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 0; + +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 0; + +drop materialized view mv1; diff --git ql/src/test/queries/clientpositive/materialized_view_rewrite_8.q ql/src/test/queries/clientpositive/materialized_view_rewrite_8.q new file mode 100644 index 0000000..837bf5b --- /dev/null +++ ql/src/test/queries/clientpositive/materialized_view_rewrite_8.q @@ -0,0 +1,116 @@ +-- SORT_QUERY_RESULTS + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; + +create table if not exists source_table_001 ( +MY_DATE date, +MY_ID bigint, +MY_ID2 bigint, +ENVIRONMENT string, +DOWN_VOLUME bigint, +UP_VOLUME bigint +) +stored AS ORC +TBLPROPERTIES("transactional"="true"); +insert into table source_table_001 + values ('2010-10-10', 1, 1, 'env', 1, 1); +analyze table source_table_001 compute statistics for columns; + +CREATE MATERIALIZED VIEW source_table_001_mv ENABLE REWRITE AS +SELECT +SUM(A.DOWN_VOLUME) AS DOWN_VOLUME_SUM, +SUM(A.UP_VOLUME) AS UP_VOLUME_SUM, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +from source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE; +analyze table source_table_001_mv compute statistics for columns; + + +explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE; + +explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +LIMIT 100; + +explain +select +1, +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE; + +explain +select +SUM(A.DOWN_VOLUME) + 0 AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE; + +-- DOES NOT WORK - PROBLEM IN FIELD TRIMMER WITH OBY +explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +ORDER BY A.MY_ID2 +LIMIT 100; + +-- WORKS WITH COLUMN STATS, CBO FAILS WITHOUT +explain +select +distinct A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A; + +explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10') +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE; + +explain +select +SUM(A.DOWN_VOLUME) + SUM(A.UP_VOLUME) AS TOTAL_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10') +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE; + +explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10'); + +explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +TO_DATE('2010-01-10') +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10'); + +explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10') +group by A.MY_DATE; + +drop materialized view source_table_001_mv; diff --git ql/src/test/queries/clientpositive/materialized_view_rewrite_9.q ql/src/test/queries/clientpositive/materialized_view_rewrite_9.q new file mode 100644 index 0000000..504b15f --- /dev/null +++ ql/src/test/queries/clientpositive/materialized_view_rewrite_9.q @@ -0,0 +1,71 @@ +-- SORT_QUERY_RESULTS + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; + +create table if not exists source_table_001 ( +MY_DATE timestamp, +MY_ID bigint, +MY_ID2 bigint, +ENVIRONMENT string, +DOWN_VOLUME bigint, +UP_VOLUME bigint +) +stored AS ORC +TBLPROPERTIES("transactional"="true"); +insert into table source_table_001 + values ('2010-10-10 00:00:00', 1, 1, 'env', 1, 1); +analyze table source_table_001 compute statistics for columns; + +CREATE MATERIALIZED VIEW source_table_001_mv ENABLE REWRITE AS +SELECT +SUM(A.DOWN_VOLUME) AS DOWN_VOLUME_SUM, +SUM(A.UP_VOLUME) AS UP_VOLUME_SUM, +A.MY_ID,A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +from source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE; +analyze table source_table_001_mv compute statistics for columns; + +explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +FLOOR(A.MY_DATE to hour),A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to hour); + +DROP MATERIALIZED VIEW source_table_001_mv; + +CREATE MATERIALIZED VIEW source_table_001_mv ENABLE REWRITE AS +SELECT +SUM(A.DOWN_VOLUME) AS DOWN_VOLUME_SUM, +SUM(A.UP_VOLUME) AS UP_VOLUME_SUM, +A.MY_ID,FLOOR(A.MY_DATE to hour),A.MY_ID2,A.ENVIRONMENT +from source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to hour); +analyze table source_table_001_mv compute statistics for columns; + +explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +FLOOR(A.MY_DATE to day),A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to day); + +explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +FLOOR(A.MY_DATE to hour),A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to hour); + +explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +FLOOR(A.MY_DATE to second),A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to second); + +DROP MATERIALIZED VIEW source_table_001_mv; diff --git ql/src/test/results/clientnegative/alter_non_native.q.out ql/src/test/results/clientnegative/alter_non_native.q.out index 0a32067..bd8fb4f 100644 --- ql/src/test/results/clientnegative/alter_non_native.q.out +++ ql/src/test/results/clientnegative/alter_non_native.q.out @@ -8,4 +8,4 @@ STORED BY 'org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler' POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@non_native1 -FAILED: SemanticException [Error 10134]: ALTER TABLE can only be used for [ADDPROPS, DROPPROPS] to a non-native table non_native1 +FAILED: SemanticException [Error 10134]: ALTER TABLE can only be used for [ADDPROPS, DROPPROPS, ADDCOLS] to a non-native table non_native1 diff --git ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out index 204c35b..1574fc7 100644 --- ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out +++ ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out @@ -120,7 +120,7 @@ STAGE PLANS: alias: default.cmv_mat_view2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: a (type: int), c (type: decimal(10,2)) + expressions: 3 (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE ListSink @@ -266,7 +266,7 @@ POSTHOOK: query: alter materialized view cmv_mat_view2 enable rewrite POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE POSTHOOK: Input: default@cmv_mat_view2 POSTHOOK: Output: default@cmv_mat_view2 -Warning: Shuffle Join JOIN[4][tables = [test_db_materialized_view_create_rewrite.cmv_mat_view2, cmv_basetable]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from ( (select a, c from cmv_basetable where a = 3) table1 @@ -292,17 +292,28 @@ STAGE PLANS: TableScan alias: default.cmv_mat_view2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: c (type: decimal(10,2)) + outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE - value expressions: a (type: int), c (type: decimal(10,2)) + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(10,2)) TableScan alias: cmv_basetable Statistics: Num rows: 5 Data size: 1205 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 1205 Basic stats: COMPLETE Column stats: NONE - value expressions: a (type: int), c (type: decimal(10,2)), d (type: int) + Filter Operator + predicate: ((a = 3) and (d = 3)) (type: boolean) + Statistics: Num rows: 1 Data size: 241 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: decimal(10,2)) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 241 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 241 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(10,2)) Reduce Operator Tree: Join Operator condition map: @@ -310,22 +321,19 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col5, _col7, _col8 - Statistics: Num rows: 10 Data size: 3580 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col5 = 3) and (_col8 = 3)) (type: boolean) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 3 (type: int), _col0 (type: decimal(10,2)), 3 (type: int), _col2 (type: decimal(10,2)) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: decimal(10,2)), _col0 (type: int), _col7 (type: decimal(10,2)) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -333,7 +341,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[4][tables = [test_db_materialized_view_create_rewrite.cmv_mat_view2, cmv_basetable]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from ( (select a, c from cmv_basetable where a = 3) table1 join diff --git ql/src/test/results/clientpositive/complex_alias.q.out ql/src/test/results/clientpositive/complex_alias.q.out index c690e8f..06bbadc 100644 --- ql/src/test/results/clientpositive/complex_alias.q.out +++ ql/src/test/results/clientpositive/complex_alias.q.out @@ -17,7 +17,7 @@ POSTHOOK: Output: default@agg1 POSTHOOK: Lineage: agg1.col0 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: agg1.col1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: agg1.col2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT single_use_subq11.a1 AS a1, single_use_subq11.a2 AS a2 @@ -207,7 +207,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT single_use_subq11.a1 AS a1, single_use_subq11.a2 AS a2 FROM (SELECT Sum(agg1.col2) AS a1 diff --git ql/src/test/results/clientpositive/druid/druidmini_mv.q.out ql/src/test/results/clientpositive/druid/druidmini_mv.q.out index 97f6d84..a87040a 100644 --- ql/src/test/results/clientpositive/druid/druidmini_mv.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_mv.q.out @@ -511,7 +511,7 @@ rawDataSize 0 storage_handler org.apache.hadoop.hive.druid.DruidStorageHandler totalSize 0 #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[8][tables = [cmv_mat_view2, cmv_basetable]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[10][tables = [cmv_mat_view2, $hdt$_0]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 @@ -541,26 +541,33 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: cmv_basetable + Statistics: Num rows: 30 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((a = 3) and (d = 3)) (type: boolean) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: double) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Map 3 + Map Operator Tree: + TableScan alias: cmv_mat_view2 properties: - druid.fieldNames vc,a,b,c,userid - druid.fieldTypes timestamp with local time zone,int,varchar(256),double,varchar(256) - druid.query.json {"queryType":"scan","dataSource":"default.cmv_mat_view2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","a","b","c","userid"],"resultFormat":"compactedList"} + druid.fieldNames a,c + druid.fieldTypes int,double + druid.query.json {"queryType":"scan","dataSource":"default.cmv_mat_view2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"columns":["a","c"],"resultFormat":"compactedList"} druid.query.type scan Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: a (type: int), c (type: double) - Map 3 - Map Operator Tree: - TableScan - alias: cmv_basetable - Statistics: Num rows: 30 Data size: 480 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 30 Data size: 480 Basic stats: COMPLETE Column stats: NONE - value expressions: a (type: int), c (type: double), d (type: int) Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -569,22 +576,19 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col3, _col9, _col12, _col13 - Statistics: Num rows: 90 Data size: 2610 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col13 = 3) and (_col9 = 3)) (type: boolean) - Statistics: Num rows: 22 Data size: 638 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: double), _col1 (type: int), _col12 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 22 Data size: 638 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 22 Data size: 638 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 3 Data size: 87 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col0 (type: int), _col6 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 87 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 87 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -592,7 +596,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[8][tables = [cmv_mat_view2, cmv_basetable]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[10][tables = [cmv_mat_view2, $hdt$_0]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 JOIN diff --git ql/src/test/results/clientpositive/druid/druidmini_test_alter.q.out ql/src/test/results/clientpositive/druid/druidmini_test_alter.q.out new file mode 100644 index 0000000..9aef045 --- /dev/null +++ ql/src/test/results/clientpositive/druid/druidmini_test_alter.q.out @@ -0,0 +1,210 @@ +PREHOOK: query: CREATE TABLE druid_alltypesorc +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (`ctimestamp2` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1 +FROM alltypesorc where ctimestamp2 IS NOT NULL +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: database:default +PREHOOK: Output: default@druid_alltypesorc +POSTHOOK: query: CREATE TABLE druid_alltypesorc +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (`ctimestamp2` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1 +FROM alltypesorc where ctimestamp2 IS NOT NULL +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@druid_alltypesorc +POSTHOOK: Lineage: druid_alltypesorc.__time EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +PREHOOK: query: DESCRIBE druid_alltypesorc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@druid_alltypesorc +POSTHOOK: query: DESCRIBE druid_alltypesorc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@druid_alltypesorc +__time timestamp with local time zone from deserializer +cstring1 string from deserializer +cdouble double from deserializer +cfloat float from deserializer +ctinyint tinyint from deserializer +csmallint smallint from deserializer +cint int from deserializer +cbigint bigint from deserializer +cboolean1 boolean from deserializer +PREHOOK: query: DESCRIBE extended druid_alltypesorc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@druid_alltypesorc +POSTHOOK: query: DESCRIBE extended druid_alltypesorc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@druid_alltypesorc +__time timestamp with local time zone from deserializer +cstring1 string from deserializer +cdouble double from deserializer +cfloat float from deserializer +ctinyint tinyint from deserializer +csmallint smallint from deserializer +cint int from deserializer +cbigint bigint from deserializer +cboolean1 boolean from deserializer + +#### A masked pattern was here #### +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +3033 +PREHOOK: query: ALTER TABLE druid_alltypesorc ADD COLUMNS (cstring2 string, cboolean2 boolean, cint2 int) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: default@druid_alltypesorc +POSTHOOK: query: ALTER TABLE druid_alltypesorc ADD COLUMNS (cstring2 string, cboolean2 boolean, cint2 int) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: default@druid_alltypesorc +PREHOOK: query: DESCRIBE druid_alltypesorc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@druid_alltypesorc +POSTHOOK: query: DESCRIBE druid_alltypesorc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@druid_alltypesorc +__time timestamp with local time zone from deserializer +cstring1 string from deserializer +cdouble double from deserializer +cfloat float from deserializer +ctinyint tinyint from deserializer +csmallint smallint from deserializer +cint int from deserializer +cbigint bigint from deserializer +cboolean1 boolean from deserializer +cstring2 string from deserializer +cboolean2 boolean from deserializer +cint2 int from deserializer +PREHOOK: query: DESCRIBE extended druid_alltypesorc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@druid_alltypesorc +POSTHOOK: query: DESCRIBE extended druid_alltypesorc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@druid_alltypesorc +__time timestamp with local time zone from deserializer +cstring1 string from deserializer +cdouble double from deserializer +cfloat float from deserializer +ctinyint tinyint from deserializer +csmallint smallint from deserializer +cint int from deserializer +cbigint bigint from deserializer +cboolean1 boolean from deserializer +cstring2 string from deserializer +cboolean2 boolean from deserializer +cint2 int from deserializer + +#### A masked pattern was here #### +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +PREHOOK: query: INSERT INTO TABLE druid_alltypesorc + SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1, +cstring2, +cboolean2, +cint +FROM alltypesorc where ctimestamp1 IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@druid_alltypesorc +POSTHOOK: query: INSERT INTO TABLE druid_alltypesorc + SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1, +cstring2, +cboolean2, +cint +FROM alltypesorc where ctimestamp1 IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@druid_alltypesorc +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +9138 +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +3041 +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +6097 +PREHOOK: query: DROP TABLE druid_alltypesorc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: default@druid_alltypesorc +POSTHOOK: query: DROP TABLE druid_alltypesorc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: default@druid_alltypesorc diff --git ql/src/test/results/clientpositive/druid_basic3.q.out ql/src/test/results/clientpositive/druid_basic3.q.out index ddce42e..96f6fe9 100644 --- ql/src/test/results/clientpositive/druid_basic3.q.out +++ ql/src/test/results/clientpositive/druid_basic3.q.out @@ -265,38 +265,26 @@ WHERE extract (week from `__time`) IN (10,11) GROUP BY language, robot POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table_1 - properties: - druid.fieldNames robot,language,$f2,$f3 - druid.fieldTypes string,string,double,double - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default"},"filter":{"type":"in","dimension":"__time","values":["10","11"],"extractionFn":{"type":"timeFormat","format":"w","timeZone":"US/Pacific","locale":"en-US"}},"aggregations":[{"type":"doubleSum","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"delta"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} - druid.query.type groupBy - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: language (type: string), robot (type: string), ($f2 - $f3) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: druid_table_1 + properties: + druid.fieldNames language,robot,a + druid.fieldTypes string,string,double + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default"},"filter":{"type":"in","dimension":"__time","values":["10","11"],"extractionFn":{"type":"timeFormat","format":"w","timeZone":"US/Pacific","locale":"en-US"}},"aggregations":[{"type":"doubleSum","name":"$f2","fieldName":"added"},{"type":"doubleSum","name":"$f3","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"a","expression":"(\"$f2\" - \"$f3\")"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: language (type: string), robot (type: string), a (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink PREHOOK: query: EXPLAIN SELECT language, sum(delta) / count(*) AS a @@ -430,24 +418,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Fetch Operator - limit: 5 + limit: -1 Processor Tree: TableScan alias: druid_table_1 properties: druid.fieldNames robot,language druid.fieldTypes string,string - druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","limit":5,"columns":[]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: language (type: string), robot (type: string), 'A' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT language, robot, "A" diff --git ql/src/test/results/clientpositive/llap/explainuser_1.q.out ql/src/test/results/clientpositive/llap/explainuser_1.q.out index ce53955..872f9c9 100644 --- ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -310,7 +310,7 @@ Stage-0 limit:-1 Stage-1 Reducer 2 llap - File Output Operator [FS_7] + File Output Operator [FS_6] Select Operator [SEL_5] (rows=10 width=97) Output:["_col0","_col1","_col2"] Group By Operator [GBY_4] (rows=10 width=101) @@ -340,15 +340,15 @@ Stage-0 limit:-1 Stage-1 Reducer 3 llap - File Output Operator [FS_12] - Select Operator [SEL_11] (rows=5 width=20) + File Output Operator [FS_11] + Select Operator [SEL_10] (rows=5 width=20) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_10] (rows=5 width=20) + Group By Operator [GBY_9] (rows=5 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] + SHUFFLE [RS_8] PartitionCols:_col0, _col1 - Group By Operator [GBY_8] (rows=5 width=20) + Group By Operator [GBY_7] (rows=5 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1 Select Operator [SEL_5] (rows=10 width=101) Output:["_col0","_col1"] @@ -382,35 +382,35 @@ Stage-0 limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_31] - Select Operator [SEL_29] (rows=1 width=20) + File Output Operator [FS_29] + Select Operator [SEL_27] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] - Select Operator [SEL_27] (rows=1 width=28) + SHUFFLE [RS_26] + Select Operator [SEL_25] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_26] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_23] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=1 width=20) + Group By Operator [GBY_22] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col1 - Select Operator [SEL_23] (rows=1 width=20) + Select Operator [SEL_21] (rows=1 width=20) Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_41] (rows=1 width=20) - Conds:RS_19._col0=RS_20._col0(Inner),RS_20._col0=RS_21._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 0)} + Merge Join Operator [MERGEJOIN_39] (rows=1 width=20) + Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 0)} <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + SHUFFLE [RS_17] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_38] (rows=18 width=84) + Filter Operator [FIL_36] (rows=18 width=84) predicate:key is not null TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] + SHUFFLE [RS_18] PartitionCols:_col0 Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] @@ -421,25 +421,25 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_39] (rows=4 width=93) + Filter Operator [FIL_37] (rows=4 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] + SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_17] (rows=1 width=89) + Select Operator [SEL_16] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=1 width=93) + Group By Operator [GBY_15] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] + SHUFFLE [RS_14] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=93) + Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_40] (rows=4 width=93) + Filter Operator [FIL_38] (rows=4 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) - TableScan [TS_11] (rows=20 width=88) + TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b % c asc, b desc) cbo_t1 left outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p left outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int % c asc, cbo_t3.c_int desc @@ -460,35 +460,35 @@ Stage-0 limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_31] - Select Operator [SEL_29] (rows=1 width=20) + File Output Operator [FS_29] + Select Operator [SEL_27] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] - Select Operator [SEL_27] (rows=1 width=28) + SHUFFLE [RS_26] + Select Operator [SEL_25] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_26] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_23] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=1 width=20) + Group By Operator [GBY_22] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_23] (rows=1 width=20) + Select Operator [SEL_21] (rows=1 width=20) Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_41] (rows=1 width=20) - Conds:RS_19._col0=RS_20._col0(Inner),RS_20._col0=RS_21._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col1 >= 1) or (_col4 >= 1L))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} + Merge Join Operator [MERGEJOIN_39] (rows=1 width=20) + Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col1 >= 1) or (_col4 >= 1L))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + SHUFFLE [RS_17] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_38] (rows=18 width=84) + Filter Operator [FIL_36] (rows=18 width=84) predicate:((c_int > 0) and key is not null) TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] + SHUFFLE [RS_18] PartitionCols:_col0 Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] @@ -499,25 +499,25 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_39] (rows=2 width=93) + Filter Operator [FIL_37] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] + SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_17] (rows=1 width=89) + Select Operator [SEL_16] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=1 width=93) + Group By Operator [GBY_15] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] + SHUFFLE [RS_14] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=93) + Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_40] (rows=2 width=93) + Filter Operator [FIL_38] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) - TableScan [TS_11] (rows=20 width=88) + TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b+c, a desc) cbo_t1 right outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 2) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c @@ -537,29 +537,29 @@ Stage-0 limit:-1 Stage-1 Reducer 3 llap - File Output Operator [FS_28] - Group By Operator [GBY_26] (rows=1 width=20) + File Output Operator [FS_26] + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_23] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=1 width=20) + Group By Operator [GBY_22] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_23] (rows=1 width=20) + Select Operator [SEL_21] (rows=1 width=20) Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_38] (rows=1 width=20) - Conds:RS_19._col0=RS_20._col0(Inner),RS_20._col0=RS_21._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 2)} + Merge Join Operator [MERGEJOIN_36] (rows=1 width=20) + Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 2)} <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + SHUFFLE [RS_17] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_35] (rows=18 width=84) + Filter Operator [FIL_33] (rows=18 width=84) predicate:key is not null TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] + SHUFFLE [RS_18] PartitionCols:_col0 Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] @@ -570,25 +570,25 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_36] (rows=2 width=93) + Filter Operator [FIL_34] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] + SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_17] (rows=1 width=89) + Select Operator [SEL_16] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=1 width=93) + Group By Operator [GBY_15] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] + SHUFFLE [RS_14] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=93) + Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_37] (rows=2 width=93) + Filter Operator [FIL_35] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) - TableScan [TS_11] (rows=20 width=88) + TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by c+a desc) cbo_t1 full outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by p+q desc, r asc) cbo_t2 on cbo_t1.a=p full outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int @@ -609,33 +609,33 @@ Stage-0 limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_30] - Select Operator [SEL_29] (rows=1 width=20) + File Output Operator [FS_28] + Select Operator [SEL_27] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] - Group By Operator [GBY_26] (rows=1 width=20) + SHUFFLE [RS_26] + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_23] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=1 width=20) + Group By Operator [GBY_22] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_23] (rows=1 width=20) + Select Operator [SEL_21] (rows=1 width=20) Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_40] (rows=1 width=20) - Conds:RS_19._col0=RS_20._col0(Inner),RS_20._col0=RS_21._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col1 >= 1) or (_col4 >= 1L))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} + Merge Join Operator [MERGEJOIN_38] (rows=1 width=20) + Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col1 >= 1) or (_col4 >= 1L))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + SHUFFLE [RS_17] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_37] (rows=18 width=84) + Filter Operator [FIL_35] (rows=18 width=84) predicate:((c_int > 0) and key is not null) TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] + SHUFFLE [RS_18] PartitionCols:_col0 Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] @@ -646,25 +646,25 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_38] (rows=2 width=93) + Filter Operator [FIL_36] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] + SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_17] (rows=1 width=89) + Select Operator [SEL_16] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=1 width=93) + Group By Operator [GBY_15] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] + SHUFFLE [RS_14] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=93) + Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_39] (rows=2 width=93) + Filter Operator [FIL_37] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) - TableScan [TS_11] (rows=20 width=88) + TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c @@ -684,29 +684,29 @@ Stage-0 limit:-1 Stage-1 Reducer 3 llap - File Output Operator [FS_28] - Group By Operator [GBY_26] (rows=1 width=20) + File Output Operator [FS_26] + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_23] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=1 width=20) + Group By Operator [GBY_22] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_23] (rows=1 width=20) + Select Operator [SEL_21] (rows=1 width=20) Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_38] (rows=1 width=20) - Conds:RS_19._col0=RS_20._col0(Inner),RS_20._col0=RS_21._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 0)} + Merge Join Operator [MERGEJOIN_36] (rows=1 width=20) + Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 0)} <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + SHUFFLE [RS_17] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_35] (rows=18 width=84) + Filter Operator [FIL_33] (rows=18 width=84) predicate:key is not null TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] + SHUFFLE [RS_18] PartitionCols:_col0 Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] @@ -717,25 +717,25 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_36] (rows=2 width=93) + Filter Operator [FIL_34] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] + SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_17] (rows=1 width=89) + Select Operator [SEL_16] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=1 width=93) + Group By Operator [GBY_15] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] + SHUFFLE [RS_14] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=93) + Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_37] (rows=2 width=93) + Filter Operator [FIL_35] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) - TableScan [TS_11] (rows=20 width=88) + TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc @@ -1213,13 +1213,13 @@ Stage-0 limit:1 Stage-1 Reducer 3 llap - File Output Operator [FS_10] - Limit [LIM_9] (rows=1 width=97) + File Output Operator [FS_9] + Limit [LIM_8] (rows=1 width=97) Number of rows:1 - Select Operator [SEL_8] (rows=10 width=97) + Select Operator [SEL_7] (rows=10 width=97) Output:["_col0","_col1","_col2"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] + SHUFFLE [RS_6] Select Operator [SEL_5] (rows=10 width=97) Output:["_col0","_col1","_col2"] Group By Operator [GBY_4] (rows=10 width=101) @@ -1250,19 +1250,19 @@ Stage-0 limit:1 Stage-1 Reducer 4 llap - File Output Operator [FS_15] - Limit [LIM_14] (rows=1 width=20) + File Output Operator [FS_14] + Limit [LIM_13] (rows=1 width=20) Number of rows:1 - Select Operator [SEL_13] (rows=5 width=20) + Select Operator [SEL_12] (rows=5 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - Group By Operator [GBY_10] (rows=5 width=20) + SHUFFLE [RS_11] + Group By Operator [GBY_9] (rows=5 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] + SHUFFLE [RS_8] PartitionCols:_col0, _col1 - Group By Operator [GBY_8] (rows=5 width=20) + Group By Operator [GBY_7] (rows=5 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 Select Operator [SEL_5] (rows=10 width=101) Output:["_col0","_col1"] @@ -1354,46 +1354,46 @@ Stage-0 limit:-1 Stage-1 Reducer 6 llap - File Output Operator [FS_44] - Limit [LIM_42] (rows=1 width=28) + File Output Operator [FS_42] + Limit [LIM_40] (rows=1 width=28) Number of rows:5 - Select Operator [SEL_41] (rows=1 width=28) + Select Operator [SEL_39] (rows=1 width=28) Output:["_col0","_col1","_col2"] <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_40] - Select Operator [SEL_39] (rows=1 width=28) + SHUFFLE [RS_38] + Select Operator [SEL_37] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_38] (rows=1 width=20) + Group By Operator [GBY_36] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_37] + SHUFFLE [RS_35] PartitionCols:_col0, _col1 - Group By Operator [GBY_36] (rows=1 width=20) + Group By Operator [GBY_34] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col6 - Select Operator [SEL_35] (rows=2 width=20) + Select Operator [SEL_33] (rows=2 width=20) Output:["_col4","_col6"] - Merge Join Operator [MERGEJOIN_56] (rows=2 width=20) - Conds:RS_31._col0=RS_32._col0(Inner),RS_32._col0=RS_33._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 + _col1) >= 0)} {((_col3 > 0) or (_col6 >= 0))} + Merge Join Operator [MERGEJOIN_54] (rows=2 width=20) + Conds:RS_29._col0=RS_30._col0(Inner),RS_30._col0=RS_31._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 + _col1) >= 0)} {((_col3 > 0) or (_col6 >= 0))} <-Map 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_33] + SHUFFLE [RS_31] PartitionCols:_col0 - Select Operator [SEL_30] (rows=18 width=84) + Select Operator [SEL_28] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_55] (rows=18 width=84) + Filter Operator [FIL_53] (rows=18 width=84) predicate:key is not null - TableScan [TS_28] (rows=20 width=84) + TableScan [TS_26] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] + SHUFFLE [RS_29] PartitionCols:_col0 - Filter Operator [FIL_12] (rows=2 width=105) + Filter Operator [FIL_11] (rows=2 width=105) predicate:_col0 is not null - Limit [LIM_10] (rows=3 width=105) + Limit [LIM_9] (rows=3 width=105) Number of rows:5 - Select Operator [SEL_9] (rows=3 width=105) + Select Operator [SEL_8] (rows=3 width=105) Output:["_col0","_col1"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] + SHUFFLE [RS_7] Select Operator [SEL_6] (rows=3 width=105) Output:["_col0","_col1","_col2","_col3"] Group By Operator [GBY_5] (rows=3 width=101) @@ -1403,33 +1403,33 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_3] (rows=3 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_52] (rows=6 width=93) + Filter Operator [FIL_50] (rows=6 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0))) TableScan [TS_0] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_32] + SHUFFLE [RS_30] PartitionCols:_col0 - Filter Operator [FIL_26] (rows=2 width=97) + Filter Operator [FIL_24] (rows=2 width=97) predicate:_col0 is not null - Limit [LIM_24] (rows=3 width=97) + Limit [LIM_22] (rows=3 width=97) Number of rows:5 - Select Operator [SEL_23] (rows=3 width=97) + Select Operator [SEL_21] (rows=3 width=97) Output:["_col0","_col1","_col2"] <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - Select Operator [SEL_20] (rows=3 width=97) + SHUFFLE [RS_20] + Select Operator [SEL_19] (rows=3 width=97) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_19] (rows=3 width=101) + Group By Operator [GBY_18] (rows=3 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] + SHUFFLE [RS_17] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_17] (rows=3 width=101) + Group By Operator [GBY_16] (rows=3 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_54] (rows=6 width=93) + Filter Operator [FIL_52] (rows=6 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0))) - TableScan [TS_14] (rows=20 width=88) + TableScan [TS_13] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t1.c_int from cbo_t1 left semi join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) @@ -1541,35 +1541,35 @@ Stage-0 limit:-1 Stage-1 Reducer 5 llap - File Output Operator [FS_35] - Select Operator [SEL_34] (rows=1 width=101) + File Output Operator [FS_34] + Select Operator [SEL_33] (rows=1 width=101) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_33] - Select Operator [SEL_32] (rows=1 width=101) + SHUFFLE [RS_32] + Select Operator [SEL_31] (rows=1 width=101) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_31] (rows=1 width=101) + Group By Operator [GBY_30] (rows=1 width=101) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_30] + SHUFFLE [RS_29] PartitionCols:_col0, _col1 - Group By Operator [GBY_29] (rows=1 width=101) + Group By Operator [GBY_28] (rows=1 width=101) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 - Merge Join Operator [MERGEJOIN_45] (rows=1 width=93) - Conds:RS_24._col0=RS_25._col0(Left Semi),RS_24._col0=RS_26._col0(Left Semi),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_44] (rows=1 width=93) + Conds:RS_23._col0=RS_24._col0(Left Semi),RS_23._col0=RS_25._col0(Left Semi),Output:["_col0","_col1"] <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_26] + SHUFFLE [RS_25] PartitionCols:_col0 - Group By Operator [GBY_23] (rows=3 width=85) + Group By Operator [GBY_22] (rows=3 width=85) Output:["_col0"],keys:_col0 - Select Operator [SEL_19] (rows=6 width=85) + Select Operator [SEL_18] (rows=6 width=85) Output:["_col0"] - Filter Operator [FIL_44] (rows=6 width=85) + Filter Operator [FIL_43] (rows=6 width=85) predicate:(UDFToDouble(key) > 0.0D) - TableScan [TS_17] (rows=20 width=80) + TableScan [TS_16] (rows=20 width=80) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_24] + SHUFFLE [RS_23] PartitionCols:_col0 Select Operator [SEL_8] (rows=1 width=93) Output:["_col0","_col1"] @@ -1584,14 +1584,14 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_3] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_42] (rows=1 width=93) + Filter Operator [FIL_41] (rows=1 width=93) predicate:((((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0D)) and ((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (UDFToDouble(key) > 0.0D) and (c_float > 0)) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_24] PartitionCols:_col0 - Group By Operator [GBY_21] (rows=1 width=85) + Group By Operator [GBY_20] (rows=1 width=85) Output:["_col0"],keys:_col0 Select Operator [SEL_15] (rows=1 width=85) Output:["_col0"] @@ -1602,7 +1602,7 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_12] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_43] (rows=1 width=93) + Filter Operator [FIL_42] (rows=1 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (UDFToDouble(key) > 0.0D) and (c_float > 0)) TableScan [TS_9] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -2144,11 +2144,11 @@ Stage-0 limit:-1 Stage-1 Reducer 3 llap - File Output Operator [FS_22] - Merge Join Operator [MERGEJOIN_27] (rows=6 width=227) - Conds:RS_18._col1=RS_19._col0(Left Semi),Output:["_col0","_col1","_col2"] + File Output Operator [FS_21] + Merge Join Operator [MERGEJOIN_26] (rows=6 width=227) + Conds:RS_17._col1=RS_18._col0(Left Semi),Output:["_col0","_col1","_col2"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] + SHUFFLE [RS_17] PartitionCols:_col1 Select Operator [SEL_6] (rows=13 width=227) Output:["_col0","_col1","_col2"] @@ -2159,27 +2159,27 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_3] (rows=13 width=235) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(p_size)","count(p_size)"],keys:p_name, p_mfgr - Filter Operator [FIL_25] (rows=26 width=223) + Filter Operator [FIL_24] (rows=26 width=223) predicate:p_name is not null TableScan [TS_0] (rows=26 width=223) default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_17] (rows=13 width=184) + Group By Operator [GBY_16] (rows=13 width=184) Output:["_col0"],keys:_col0 - Select Operator [SEL_12] (rows=26 width=184) + Select Operator [SEL_11] (rows=26 width=184) Output:["_col0"] - Filter Operator [FIL_26] (rows=26 width=491) + Filter Operator [FIL_25] (rows=26 width=491) predicate:first_value_window_0 is not null - PTF Operator [PTF_11] (rows=26 width=491) + PTF Operator [PTF_10] (rows=26 width=491) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_10] (rows=26 width=491) + Select Operator [SEL_9] (rows=26 width=491) Output:["_col1","_col2","_col5"] <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] + SHUFFLE [RS_8] PartitionCols:p_mfgr - TableScan [TS_8] (rows=26 width=223) + TableScan [TS_7] (rows=26 width=223) default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"] PREHOOK: query: explain select * diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out index 617ae3b..7d46874 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out @@ -119,7 +119,7 @@ STAGE PLANS: TableScan alias: default.cmv_mat_view2 Select Operator - expressions: a (type: int), c (type: decimal(10,2)) + expressions: 3 (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 ListSink @@ -278,7 +278,7 @@ POSTHOOK: query: alter materialized view cmv_mat_view2 enable rewrite POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE POSTHOOK: Input: default@cmv_mat_view2 POSTHOOK: Output: default@cmv_mat_view2 -Warning: Shuffle Join MERGEJOIN[8][tables = [default.cmv_mat_view2, cmv_basetable]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from ( (select a, c from cmv_basetable where a = 3) table1 @@ -309,11 +309,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.cmv_mat_view2 - Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE - value expressions: a (type: int), c (type: decimal(10,2)) + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: decimal(10,2)) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(10,2)) Execution mode: llap LLAP IO: all inputs Map 3 @@ -321,10 +325,17 @@ STAGE PLANS: TableScan alias: cmv_basetable Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: a (type: int), c (type: decimal(10,2)), d (type: int) + Filter Operator + predicate: ((a = 3) and (d = 3)) (type: boolean) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c (type: decimal(10,2)) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(10,2)) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -336,22 +347,19 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col5, _col7, _col8 - Statistics: Num rows: 10 Data size: 2370 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col5 = 3) and (_col8 = 3)) (type: boolean) - Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: decimal(10,2)), _col0 (type: int), _col7 (type: decimal(10,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col2 + Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 3 (type: int), _col0 (type: decimal(10,2)), 3 (type: int), _col2 (type: decimal(10,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -359,7 +367,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[8][tables = [default.cmv_mat_view2, cmv_basetable]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from ( (select a, c from cmv_basetable where a = 3) table1 join diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out index 617ae3b..7d46874 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out @@ -119,7 +119,7 @@ STAGE PLANS: TableScan alias: default.cmv_mat_view2 Select Operator - expressions: a (type: int), c (type: decimal(10,2)) + expressions: 3 (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 ListSink @@ -278,7 +278,7 @@ POSTHOOK: query: alter materialized view cmv_mat_view2 enable rewrite POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE POSTHOOK: Input: default@cmv_mat_view2 POSTHOOK: Output: default@cmv_mat_view2 -Warning: Shuffle Join MERGEJOIN[8][tables = [default.cmv_mat_view2, cmv_basetable]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from ( (select a, c from cmv_basetable where a = 3) table1 @@ -309,11 +309,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.cmv_mat_view2 - Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE - value expressions: a (type: int), c (type: decimal(10,2)) + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: decimal(10,2)) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(10,2)) Execution mode: llap LLAP IO: all inputs Map 3 @@ -321,10 +325,17 @@ STAGE PLANS: TableScan alias: cmv_basetable Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: a (type: int), c (type: decimal(10,2)), d (type: int) + Filter Operator + predicate: ((a = 3) and (d = 3)) (type: boolean) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c (type: decimal(10,2)) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(10,2)) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -336,22 +347,19 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col5, _col7, _col8 - Statistics: Num rows: 10 Data size: 2370 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col5 = 3) and (_col8 = 3)) (type: boolean) - Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: decimal(10,2)), _col0 (type: int), _col7 (type: decimal(10,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 474 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col2 + Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 3 (type: int), _col0 (type: decimal(10,2)), 3 (type: int), _col2 (type: decimal(10,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -359,7 +367,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[8][tables = [default.cmv_mat_view2, cmv_basetable]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from ( (select a, c from cmv_basetable where a = 3) table1 join diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out index 8930b80..af56032 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out @@ -155,7 +155,7 @@ STAGE PLANS: TableScan alias: db2.cmv_mat_view2 Select Operator - expressions: a (type: int), c (type: decimal(10,2)) + expressions: 3 (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 ListSink diff --git ql/src/test/results/clientpositive/llap/sharedworkext.q.out ql/src/test/results/clientpositive/llap/sharedworkext.q.out index e56b1ce..02470f7 100644 --- ql/src/test/results/clientpositive/llap/sharedworkext.q.out +++ ql/src/test/results/clientpositive/llap/sharedworkext.q.out @@ -616,15 +616,15 @@ STAGE PLANS: Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: 0 (type: int), _col0 (type: string) + key expressions: 0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: 0 (type: int) Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: 0 (type: int), _col0 (type: string) + key expressions: 0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: 0 (type: int) Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE @@ -633,24 +633,24 @@ STAGE PLANS: Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 404 Data size: 35148 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string + output shape: _col1: string type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col0 ASC NULLS FIRST + order by: _col1 ASC NULLS FIRST partition by: 0 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col0 + arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -691,24 +691,24 @@ STAGE PLANS: Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 404 Data size: 35148 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string + output shape: _col1: string type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col0 ASC NULLS FIRST + order by: _col1 ASC NULLS FIRST partition by: 0 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col0 + arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) diff --git ql/src/test/results/clientpositive/llap/subquery_in_having.q.out ql/src/test/results/clientpositive/llap/subquery_in_having.q.out index 79857ab..eb76ad8 100644 --- ql/src/test/results/clientpositive/llap/subquery_in_having.q.out +++ ql/src/test/results/clientpositive/llap/subquery_in_having.q.out @@ -1573,11 +1573,11 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@src_null POSTHOOK: Lineage: src_null.key SCRIPT [] POSTHOOK: Lineage: src_null.value EXPRESSION [] +Warning: Map Join MAPJOIN[122][bigTable=?] in task 'Map 1' is a cross product Warning: Map Join MAPJOIN[123][bigTable=?] in task 'Map 1' is a cross product Warning: Map Join MAPJOIN[124][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[125][bigTable=?] in task 'Map 1' is a cross product -Warning: Shuffle Join MERGEJOIN[126][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[127][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[125][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[126][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain select key, value, count(*) from src_null b @@ -2049,11 +2049,11 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[122][bigTable=?] in task 'Map 1' is a cross product Warning: Map Join MAPJOIN[123][bigTable=?] in task 'Map 1' is a cross product Warning: Map Join MAPJOIN[124][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[125][bigTable=?] in task 'Map 1' is a cross product -Warning: Shuffle Join MERGEJOIN[126][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[127][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[125][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[126][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product PREHOOK: query: select key, value, count(*) from src_null b where NOT EXISTS (select key from src_null where src_null.value <> b.value) diff --git ql/src/test/results/clientpositive/llap/subquery_notin.q.out ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 7b8e87a..2190dfd 100644 --- ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -2973,10 +2973,10 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1729 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1729 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3 @@ -5536,13 +5536,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) - Reducer 6 <- Map 10 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 8 <- Map 10 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 5 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5562,7 +5561,7 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 10 + Map 4 Map Operator Tree: TableScan alias: t2 @@ -5577,15 +5576,9 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(_col0) (type: double) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 8 Map Operator Tree: TableScan alias: t1 @@ -5647,26 +5640,6 @@ STAGE PLANS: Reducer 5 Execution mode: llap Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: char(100)) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: char(100)) - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: char(100)) - Reducer 6 - Execution mode: llap - Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 @@ -5676,8 +5649,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(), count(_col1) - keys: _col0 (type: char(100)) + aggregations: count(), count(_col0) + keys: _col1 (type: char(100)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE @@ -5687,7 +5660,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: char(100)) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 7 + Group By Operator + keys: _col0 (type: int), _col1 (type: char(100)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: char(100)) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5702,28 +5685,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: char(100)) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int), _col1 (type: char(100)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: char(100)) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5744,6 +5706,20 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(100)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: char(100)) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/subquery_select.q.out ql/src/test/results/clientpositive/llap/subquery_select.q.out index 8ad0705..4c8fefc 100644 --- ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -5046,7 +5046,7 @@ POSTHOOK: Input: default@part 2 28 46 28 23 28 -Warning: Shuffle Join MERGEJOIN[90][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[89][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select t1.p_size, (select count(*) from part t2 where t2.p_partkey = t1.p_partkey group by t2.p_partkey), (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type @@ -5385,7 +5385,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[90][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[89][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select t1.p_size, (select count(*) from part t2 where t2.p_partkey = t1.p_partkey group by t2.p_partkey), (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type diff --git ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index 839952f..b1209d9 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -1031,8 +1031,8 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: e011_03 - Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + alias: e011_01 + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] @@ -1042,15 +1042,15 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:decimal(15,2)) predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) - outputColumnNames: _col0, _col1 + expressions: c1 (type: decimal(15,2)) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(15,2)) sort order: + @@ -1060,9 +1060,8 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] - Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(15,2)) + valueColumnNums: [] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -1077,15 +1076,15 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: c1:decimal(15,2), c2:decimal(15,2) partitionColumnCount: 0 scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan - alias: e011_01 - Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + alias: e011_03 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:decimal(15,2), 1:c2:decimal(15,2), 2:ROW__ID:struct] @@ -1095,15 +1094,15 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:decimal(15,2)) predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: c1 (type: decimal(15,2)) - outputColumnNames: _col0 + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(15,2)) sort order: + @@ -1113,8 +1112,9 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(15,2)) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -1129,7 +1129,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0] + includeColumns: [0, 1] dataColumns: c1:decimal(15,2), c2:decimal(15,2) partitionColumnCount: 0 scratchColumnTypeNames: [] @@ -1145,8 +1145,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col2) - keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + aggregations: sum(_col0) + keys: _col1 (type: decimal(15,2)), _col2 (type: decimal(15,2)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE @@ -1479,41 +1479,37 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumDouble - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: sum_window_0 (type: double) - outputColumnNames: _col0 + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1539,7 +1535,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_01 POSTHOOK: Input: default@e011_03 #### A masked pattern was here #### -sum_window_0 +_c0 NULL NULL NULL @@ -2335,8 +2331,8 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: e011_03_small - Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + alias: e011_01_small + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] @@ -2346,15 +2342,15 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:decimal(7,2)) predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) - outputColumnNames: _col0, _col1 + expressions: c1 (type: decimal(7,2)) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(7,2)) sort order: + @@ -2364,9 +2360,8 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] - Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(7,2)) + valueColumnNums: [] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -2381,15 +2376,15 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: c1:decimal(7,2), c2:decimal(7,2) partitionColumnCount: 0 scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan - alias: e011_01_small - Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + alias: e011_03_small + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:decimal(7,2), 1:c2:decimal(7,2), 2:ROW__ID:struct] @@ -2399,15 +2394,15 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:decimal(7,2)) predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: c1 (type: decimal(7,2)) - outputColumnNames: _col0 + expressions: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(7,2)) sort order: + @@ -2417,8 +2412,9 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + valueColumnNums: [1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -2433,7 +2429,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0] + includeColumns: [0, 1] dataColumns: c1:decimal(7,2), c2:decimal(7,2) partitionColumnCount: 0 scratchColumnTypeNames: [] @@ -2449,8 +2445,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col2) - keys: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) + aggregations: sum(_col0) + keys: _col1 (type: decimal(7,2)), _col2 (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE @@ -2783,41 +2779,37 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: decimal(7,2)), _col0 (type: decimal(7,2)), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(7,2), _col1: decimal(7,2), _col2: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumDouble - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: sum_window_0 (type: double) - outputColumnNames: _col0 + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -2843,7 +2835,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_01_small POSTHOOK: Input: default@e011_03_small #### A masked pattern was here #### -sum_window_0 +_c0 NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out index c0cb6c0..568549d 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out @@ -137,41 +137,37 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 50 Data size: 9408 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: string), _col2 (type: double), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: int, _col2: double, _col3: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col1 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 50 Data size: 9408 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: int, _col1: string, _col2: double, _col3: bigint - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col0 - name: sum - window function: GenericUDAFSumLong - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Select Operator + expressions: _col1 (type: int), _col0 (type: string), (_col2 / _col3) (type: double), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 50 Data size: 9408 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), (_col2 / _col3) (type: double), sum_window_0 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 50 Data size: 9408 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 50 Data size: 9408 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_windowing.q.out ql/src/test/results/clientpositive/llap/vector_windowing.q.out index 8e73eb0..c713303 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -3878,27 +3878,19 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int), _col3 (type: double), _col4 (type: double), _col5 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] native: true - projectedOutputColumnNums: [1, 0, 2, 3, 4, 5] + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [1] + valueColumnNums: [2, 3, 4, 5] Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2, 3, 4, 5] - Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int), _col3 (type: double), _col4 (type: double), _col5 (type: double) + value expressions: _col2 (type: int), _col3 (type: double), _col4 (type: double), _col5 (type: double) Reducer 3 Execution mode: llap Reduce Vectorization: @@ -3908,7 +3900,7 @@ STAGE PLANS: vectorized: false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator @@ -3920,8 +3912,8 @@ STAGE PLANS: Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST - partition by: _col0 + order by: _col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST + partition by: _col1 raw input shape: window functions: window function definition @@ -3938,7 +3930,7 @@ STAGE PLANS: window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double), round(sum_window_0, 2) (type: double), _col4 (type: double), _col5 (type: double), round(avg_window_1, 2) (type: double) + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int), _col3 (type: double), round(sum_window_0, 2) (type: double), _col4 (type: double), _col5 (type: double), round(avg_window_1, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 13 Data size: 3419 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git ql/src/test/results/clientpositive/materialized_view_create_rewrite.q.out ql/src/test/results/clientpositive/materialized_view_create_rewrite.q.out index 4da3d09..18eb1d1 100644 --- ql/src/test/results/clientpositive/materialized_view_create_rewrite.q.out +++ ql/src/test/results/clientpositive/materialized_view_create_rewrite.q.out @@ -120,7 +120,7 @@ STAGE PLANS: alias: default.cmv_mat_view2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: a (type: int), c (type: decimal(10,2)) + expressions: 3 (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE ListSink @@ -266,7 +266,7 @@ POSTHOOK: query: alter materialized view cmv_mat_view2 enable rewrite POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE POSTHOOK: Input: default@cmv_mat_view2 POSTHOOK: Output: default@cmv_mat_view2 -Warning: Shuffle Join JOIN[4][tables = [default.cmv_mat_view2, cmv_basetable]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from ( (select a, c from cmv_basetable where a = 3) table1 @@ -292,17 +292,28 @@ STAGE PLANS: TableScan alias: default.cmv_mat_view2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: c (type: decimal(10,2)) + outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE - value expressions: a (type: int), c (type: decimal(10,2)) + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(10,2)) TableScan alias: cmv_basetable Statistics: Num rows: 5 Data size: 1205 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 1205 Basic stats: COMPLETE Column stats: NONE - value expressions: a (type: int), c (type: decimal(10,2)), d (type: int) + Filter Operator + predicate: ((a = 3) and (d = 3)) (type: boolean) + Statistics: Num rows: 1 Data size: 241 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: decimal(10,2)) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 241 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 241 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(10,2)) Reduce Operator Tree: Join Operator condition map: @@ -310,22 +321,19 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col5, _col7, _col8 - Statistics: Num rows: 10 Data size: 3580 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col5 = 3) and (_col8 = 3)) (type: boolean) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 3 (type: int), _col0 (type: decimal(10,2)), 3 (type: int), _col2 (type: decimal(10,2)) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: decimal(10,2)), _col0 (type: int), _col7 (type: decimal(10,2)) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 716 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -333,7 +341,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[4][tables = [default.cmv_mat_view2, cmv_basetable]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from ( (select a, c from cmv_basetable where a = 3) table1 join diff --git ql/src/test/results/clientpositive/materialized_view_create_rewrite_multi_db.q.out ql/src/test/results/clientpositive/materialized_view_create_rewrite_multi_db.q.out index d7ee468..32b408f 100644 --- ql/src/test/results/clientpositive/materialized_view_create_rewrite_multi_db.q.out +++ ql/src/test/results/clientpositive/materialized_view_create_rewrite_multi_db.q.out @@ -156,7 +156,7 @@ STAGE PLANS: alias: db2.cmv_mat_view2 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: a (type: int), c (type: decimal(10,2)) + expressions: 3 (type: int), c (type: decimal(10,2)) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE ListSink diff --git ql/src/test/results/clientpositive/materialized_view_rewrite_1.q.out ql/src/test/results/clientpositive/materialized_view_rewrite_1.q.out new file mode 100644 index 0000000..75e828f --- /dev/null +++ ql/src/test/results/clientpositive/materialized_view_rewrite_1.q.out @@ -0,0 +1,1104 @@ +PREHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps +POSTHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps +PREHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@emps +POSTHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@emps +POSTHOOK: Lineage: emps.commission SCRIPT [] +POSTHOOK: Lineage: emps.deptno SCRIPT [] +POSTHOOK: Lineage: emps.empid SCRIPT [] +POSTHOOK: Lineage: emps.name SCRIPT [] +POSTHOOK: Lineage: emps.salary SCRIPT [] +PREHOOK: query: analyze table emps compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Output: default@emps +#### A masked pattern was here #### +POSTHOOK: query: analyze table emps compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Output: default@emps +#### A masked pattern was here #### +PREHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@depts +POSTHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@depts +PREHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@depts +POSTHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@depts +POSTHOOK: Lineage: depts.deptno SCRIPT [] +POSTHOOK: Lineage: depts.locationid SCRIPT [] +POSTHOOK: Lineage: depts.name SCRIPT [] +PREHOOK: query: analyze table depts compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Output: default@depts +#### A masked pattern was here #### +POSTHOOK: query: analyze table depts compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Output: default@depts +#### A masked pattern was here #### +PREHOOK: query: create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dependents +POSTHOOK: query: create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dependents +PREHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dependents +POSTHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dependents +POSTHOOK: Lineage: dependents.empid SCRIPT [] +POSTHOOK: Lineage: dependents.name SCRIPT [] +PREHOOK: query: analyze table dependents compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Output: default@dependents +#### A masked pattern was here #### +POSTHOOK: query: analyze table dependents compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Output: default@dependents +#### A masked pattern was here #### +PREHOOK: query: create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@locations +POSTHOOK: query: create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@locations +PREHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@locations +POSTHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@locations +POSTHOOK: Lineage: locations.locationid SCRIPT [] +POSTHOOK: Lineage: locations.name SCRIPT [] +PREHOOK: query: analyze table locations compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@locations +PREHOOK: Output: default@locations +#### A masked pattern was here #### +POSTHOOK: query: analyze table locations compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@locations +POSTHOOK: Output: default@locations +#### A masked pattern was here #### +PREHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table dependents add constraint pk3 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table dependents add constraint pk3 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table locations add constraint pk4 primary key (locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table locations add constraint pk4 primary key (locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: create materialized view mv1 enable rewrite as +select * from emps where empid < 150 +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select * from emps where empid < 150 +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select * +from (select * from emps where empid < 120) t +join depts using (deptno) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from (select * from emps where empid < 120) t +join depts using (deptno) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((empid < 120) and deptno is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), deptno (type: int), name (type: varchar(256)), salary (type: float), commission (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: varchar(256)), _col3 (type: float), _col4 (type: int) + TableScan + alias: depts + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), name (type: varchar(256)), locationid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: varchar(256)), _col2 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 3 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: int), _col0 (type: int), _col2 (type: varchar(256)), _col3 (type: float), _col4 (type: int), _col6 (type: varchar(256)), _col7 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 3 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from (select * from emps where empid < 120) t +join depts using (deptno) +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select * +from (select * from emps where empid < 120) t +join depts using (deptno) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +10 100 Bill 10000.0 1000 Sales 10 +10 110 Bill 10000.0 250 Sales 10 +10 110 Theodore 10000.0 250 Sales 10 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select deptno, name, salary, commission +from emps +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select deptno, name, salary, commission +from emps +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select emps.name, emps.salary, emps.commission +from emps +join depts using (deptno) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select emps.name, emps.salary, emps.commission +from emps +join depts using (deptno) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptno is not null (type: boolean) + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), name (type: varchar(256)), salary (type: float), commission (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: varchar(256)), _col2 (type: float), _col3 (type: int) + TableScan + alias: depts + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: varchar(256)), _col2 (type: float), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 490 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select emps.name, emps.salary, emps.commission +from emps +join depts using (deptno) +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select emps.name, emps.salary, emps.commission +from emps +join depts using (deptno) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +Bill 10000.0 1000 +Bill 10000.0 250 +Eric 8000.0 500 +Sebastian 7000.0 NULL +Theodore 10000.0 250 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select empid deptno from emps +join depts using (deptno) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select empid deptno from emps +join depts using (deptno) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select empid deptno from emps +join depts using (deptno) where empid = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select empid deptno from emps +join depts using (deptno) where empid = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptno = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select empid deptno from emps +join depts using (deptno) where empid = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select empid deptno from emps +join depts using (deptno) where empid = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select * from emps where empid < 200 +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select * from emps where empid < 200 +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select * from emps where empid > 120 +union all select * from emps where empid < 150 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from emps where empid > 120 +union all select * from emps where empid < 150 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emps + Statistics: Num rows: 5 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (empid > 120) (type: boolean) + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), deptno (type: int), name (type: varchar(256)), salary (type: float), commission (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (empid < 150) (type: boolean) + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), deptno (type: int), name (type: varchar(256)), salary (type: float), commission (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from emps where empid > 120 +union all select * from emps where empid < 150 +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where empid > 120 +union all select * from emps where empid < 150 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +100 10 Bill 10000.0 1000 +110 10 Bill 10000.0 250 +110 10 Theodore 10000.0 250 +150 10 Sebastian 7000.0 NULL +200 20 Eric 8000.0 500 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select empid, deptno from emps group by empid, deptno +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select empid, deptno from emps group by empid, deptno +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select empid, deptno from emps group by empid, deptno +PREHOOK: type: QUERY +POSTHOOK: query: explain +select empid, deptno from emps group by empid, deptno +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emps + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), deptno (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select empid, deptno from emps group by empid, deptno +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select empid, deptno from emps group by empid, deptno +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +100 10 +110 10 +110 10 +150 10 +200 20 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select empid, name from emps group by empid, name +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select empid, name from emps group by empid, name +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select empid, name from emps group by empid, name +PREHOOK: type: QUERY +POSTHOOK: query: explain +select empid, name from emps group by empid, name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emps + Statistics: Num rows: 5 Data size: 470 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 470 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select empid, name from emps group by empid, name +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select empid, name from emps group by empid, name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +100 Bill +110 Bill +110 Theodore +150 Sebastian +200 Eric +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select name, salary from emps group by name, salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select name, salary from emps group by name, salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select name, salary from emps group by name, salary +PREHOOK: type: QUERY +POSTHOOK: query: explain +select name, salary from emps group by name, salary +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)), salary (type: float) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select name, salary from emps group by name, salary +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select name, salary from emps group by name, salary +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +Bill 10000.0 +Eric 8000.0 +Sebastian 7000.0 +Theodore 10000.0 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select name, salary from emps group by name, salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select name, salary from emps group by name, salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select name from emps group by name +PREHOOK: type: QUERY +POSTHOOK: query: explain +select name from emps group by name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)) + outputColumnNames: name + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: name (type: varchar(256)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: varchar(256)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select name from emps group by name +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select name from emps group by name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +Bill +Eric +Sebastian +Theodore +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select name, salary from emps where deptno = 10 group by name, salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select name, salary from emps where deptno = 10 group by name, salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select name from emps where deptno = 10 group by name +PREHOOK: type: QUERY +POSTHOOK: query: explain +select name from emps where deptno = 10 group by name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)) + outputColumnNames: name + Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: name (type: varchar(256)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: varchar(256)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select name from emps where deptno = 10 group by name +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select name from emps where deptno = 10 group by name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +Bill +Sebastian +Theodore +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select name, salary, count(*) as c, sum(empid) as s +from emps group by name, salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select name, salary, count(*) as c, sum(empid) as s +from emps group by name, salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select name from emps group by name +PREHOOK: type: QUERY +POSTHOOK: query: explain +select name from emps group by name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)) + outputColumnNames: name + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: name (type: varchar(256)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: varchar(256)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select name from emps group by name +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select name from emps group by name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +Bill +Eric +Sebastian +Theodore +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 diff --git ql/src/test/results/clientpositive/materialized_view_rewrite_2.q.out ql/src/test/results/clientpositive/materialized_view_rewrite_2.q.out new file mode 100644 index 0000000..6e8b2e3 --- /dev/null +++ ql/src/test/results/clientpositive/materialized_view_rewrite_2.q.out @@ -0,0 +1,796 @@ +PREHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps +POSTHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps +PREHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@emps +POSTHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@emps +POSTHOOK: Lineage: emps.commission SCRIPT [] +POSTHOOK: Lineage: emps.deptno SCRIPT [] +POSTHOOK: Lineage: emps.empid SCRIPT [] +POSTHOOK: Lineage: emps.name SCRIPT [] +POSTHOOK: Lineage: emps.salary SCRIPT [] +PREHOOK: query: analyze table emps compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Output: default@emps +#### A masked pattern was here #### +POSTHOOK: query: analyze table emps compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Output: default@emps +#### A masked pattern was here #### +PREHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@depts +POSTHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@depts +PREHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@depts +POSTHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@depts +POSTHOOK: Lineage: depts.deptno SCRIPT [] +POSTHOOK: Lineage: depts.locationid SCRIPT [] +POSTHOOK: Lineage: depts.name SCRIPT [] +PREHOOK: query: analyze table depts compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Output: default@depts +#### A masked pattern was here #### +POSTHOOK: query: analyze table depts compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Output: default@depts +#### A masked pattern was here #### +PREHOOK: query: create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dependents +POSTHOOK: query: create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dependents +PREHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dependents +POSTHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dependents +POSTHOOK: Lineage: dependents.empid SCRIPT [] +POSTHOOK: Lineage: dependents.name SCRIPT [] +PREHOOK: query: analyze table dependents compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Output: default@dependents +#### A masked pattern was here #### +POSTHOOK: query: analyze table dependents compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Output: default@dependents +#### A masked pattern was here #### +PREHOOK: query: create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@locations +POSTHOOK: query: create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@locations +PREHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@locations +POSTHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@locations +POSTHOOK: Lineage: locations.locationid SCRIPT [] +POSTHOOK: Lineage: locations.name SCRIPT [] +PREHOOK: query: analyze table locations compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@locations +PREHOOK: Output: default@locations +#### A masked pattern was here #### +POSTHOOK: query: analyze table locations compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@locations +POSTHOOK: Output: default@locations +#### A masked pattern was here #### +PREHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table dependents add constraint pk3 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table dependents add constraint pk3 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table locations add constraint pk4 primary key (locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table locations add constraint pk4 primary key (locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: create materialized view mv1 enable rewrite as +select empid, depts.deptno from emps +join depts using (deptno) where depts.deptno > 10 +group by empid, depts.deptno +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select empid, depts.deptno from emps +join depts using (deptno) where depts.deptno > 10 +group by empid, depts.deptno +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno +PREHOOK: type: QUERY +POSTHOOK: query: explain +select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptno > 20) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select depts.deptno, empid from depts +join emps using (deptno) where depts.deptno > 10 +group by empid, depts.deptno +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select depts.deptno, empid from depts +join emps using (deptno) where depts.deptno > 10 +group by empid, depts.deptno +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno +PREHOOK: type: QUERY +POSTHOOK: query: explain +select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptno > 20) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select empid, depts.deptno from emps +join depts using (deptno) where emps.deptno > 10 +group by empid, depts.deptno +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select empid, depts.deptno from emps +join depts using (deptno) where emps.deptno > 10 +group by empid, depts.deptno +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno +PREHOOK: type: QUERY +POSTHOOK: query: explain +select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptno > 20) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select empid from emps +join depts using (deptno) where depts.deptno > 20 +group by empid, depts.deptno +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select depts.deptno, emps.empid from depts +join emps using (deptno) where emps.empid > 10 +group by depts.deptno, emps.empid +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select depts.deptno, emps.empid from depts +join emps using (deptno) where emps.empid > 10 +group by depts.deptno, emps.empid +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select depts.deptno from depts +join emps using (deptno) where emps.empid > 15 +group by depts.deptno, emps.empid +PREHOOK: type: QUERY +POSTHOOK: query: explain +select depts.deptno from depts +join emps using (deptno) where emps.empid > 15 +group by depts.deptno, emps.empid +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (empid > 15) (type: boolean) + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select depts.deptno from depts +join emps using (deptno) where emps.empid > 15 +group by depts.deptno, emps.empid +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select depts.deptno from depts +join emps using (deptno) where emps.empid > 15 +group by depts.deptno, emps.empid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +10 +10 +10 +10 +20 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select depts.deptno, emps.empid from depts +join emps using (deptno) where emps.empid > 10 +group by depts.deptno, emps.empid +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select depts.deptno, emps.empid from depts +join emps using (deptno) where emps.empid > 10 +group by depts.deptno, emps.empid +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select depts.deptno from depts +join emps using (deptno) where emps.empid > 15 +group by depts.deptno +PREHOOK: type: QUERY +POSTHOOK: query: explain +select depts.deptno from depts +join emps using (deptno) where emps.empid > 15 +group by depts.deptno +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (empid > 15) (type: boolean) + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select depts.deptno from depts +join emps using (deptno) where emps.empid > 15 +group by depts.deptno +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select depts.deptno from depts +join emps using (deptno) where emps.empid > 15 +group by depts.deptno +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +10 +20 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: create materialized view mv1 enable rewrite as +select depts.name, dependents.name as name2, emps.deptno, depts.deptno as deptno2, dependents.empid +from depts, dependents, emps +where depts.deptno > 10 +group by depts.name, dependents.name, emps.deptno, depts.deptno, dependents.empid +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select depts.name, dependents.name as name2, emps.deptno, depts.deptno as deptno2, dependents.empid +from depts, dependents, emps +where depts.deptno > 10 +group by depts.name, dependents.name, emps.deptno, depts.deptno, dependents.empid +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 +group by dependents.empid +PREHOOK: type: QUERY +POSTHOOK: query: explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 +group by dependents.empid +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 8 Data size: 1536 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((deptno = deptno2) and (name = name2)) (type: boolean) + Statistics: Num rows: 2 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 +group by dependents.empid +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 +group by dependents.empid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 diff --git ql/src/test/results/clientpositive/materialized_view_rewrite_3.q.out ql/src/test/results/clientpositive/materialized_view_rewrite_3.q.out new file mode 100644 index 0000000..e3bd233 --- /dev/null +++ ql/src/test/results/clientpositive/materialized_view_rewrite_3.q.out @@ -0,0 +1,576 @@ +PREHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps +POSTHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps +PREHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@emps +POSTHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@emps +POSTHOOK: Lineage: emps.commission SCRIPT [] +POSTHOOK: Lineage: emps.deptno SCRIPT [] +POSTHOOK: Lineage: emps.empid SCRIPT [] +POSTHOOK: Lineage: emps.name SCRIPT [] +POSTHOOK: Lineage: emps.salary SCRIPT [] +PREHOOK: query: analyze table emps compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Output: default@emps +#### A masked pattern was here #### +POSTHOOK: query: analyze table emps compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Output: default@emps +#### A masked pattern was here #### +PREHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@depts +POSTHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@depts +PREHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@depts +POSTHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@depts +POSTHOOK: Lineage: depts.deptno SCRIPT [] +POSTHOOK: Lineage: depts.locationid SCRIPT [] +POSTHOOK: Lineage: depts.name SCRIPT [] +PREHOOK: query: analyze table depts compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Output: default@depts +#### A masked pattern was here #### +POSTHOOK: query: analyze table depts compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Output: default@depts +#### A masked pattern was here #### +PREHOOK: query: create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dependents +POSTHOOK: query: create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dependents +PREHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dependents +POSTHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dependents +POSTHOOK: Lineage: dependents.empid SCRIPT [] +POSTHOOK: Lineage: dependents.name SCRIPT [] +PREHOOK: query: analyze table dependents compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Output: default@dependents +#### A masked pattern was here #### +POSTHOOK: query: analyze table dependents compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Output: default@dependents +#### A masked pattern was here #### +PREHOOK: query: create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@locations +POSTHOOK: query: create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@locations +PREHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@locations +POSTHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@locations +POSTHOOK: Lineage: locations.locationid SCRIPT [] +POSTHOOK: Lineage: locations.name SCRIPT [] +PREHOOK: query: analyze table locations compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@locations +PREHOOK: Output: default@locations +#### A masked pattern was here #### +POSTHOOK: query: analyze table locations compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@locations +POSTHOOK: Output: default@locations +#### A masked pattern was here #### +PREHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table dependents add constraint pk3 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table dependents add constraint pk3 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table locations add constraint pk4 primary key (locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table locations add constraint pk4 primary key (locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: create materialized view mv1 enable rewrite as +select empid deptno from emps +join depts using (deptno) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select empid deptno from emps +join depts using (deptno) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select empid deptno from emps +join depts using (deptno) where empid = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select empid deptno from emps +join depts using (deptno) where empid = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptno = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select empid deptno from emps +join depts using (deptno) where empid = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select empid deptno from emps +join depts using (deptno) where empid = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select cast(empid as BIGINT) from emps +join depts using (deptno) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select cast(empid as BIGINT) from emps +join depts using (deptno) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select empid deptno from emps +join depts using (deptno) where empid > 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select empid deptno from emps +join depts using (deptno) where empid > 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToInteger(_c0) > 1) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_c0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select empid deptno from emps +join depts using (deptno) where empid > 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select empid deptno from emps +join depts using (deptno) where empid > 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +100 +110 +150 +200 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select cast(empid as BIGINT) from emps +join depts using (deptno) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select cast(empid as BIGINT) from emps +join depts using (deptno) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select empid deptno from emps +join depts using (deptno) where empid = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select empid deptno from emps +join depts using (deptno) where empid = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToInteger(_c0) = 1) (type: boolean) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select empid deptno from emps +join depts using (deptno) where empid = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select empid deptno from emps +join depts using (deptno) where empid = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select depts.name +from emps +join depts on (emps.deptno = depts.deptno) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select depts.name +from emps +join depts on (emps.deptno = depts.deptno) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 4 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 4 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: dependents + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col1 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: varchar(256)) + 1 _col1 (type: varchar(256)) + outputColumnNames: _col1 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 diff --git ql/src/test/results/clientpositive/materialized_view_rewrite_4.q.out ql/src/test/results/clientpositive/materialized_view_rewrite_4.q.out new file mode 100644 index 0000000..7301571 --- /dev/null +++ ql/src/test/results/clientpositive/materialized_view_rewrite_4.q.out @@ -0,0 +1,944 @@ +PREHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps +POSTHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps +PREHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@emps +POSTHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@emps +POSTHOOK: Lineage: emps.commission SCRIPT [] +POSTHOOK: Lineage: emps.deptno SCRIPT [] +POSTHOOK: Lineage: emps.empid SCRIPT [] +POSTHOOK: Lineage: emps.name SCRIPT [] +POSTHOOK: Lineage: emps.salary SCRIPT [] +PREHOOK: query: analyze table emps compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Output: default@emps +#### A masked pattern was here #### +POSTHOOK: query: analyze table emps compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Output: default@emps +#### A masked pattern was here #### +PREHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@depts +POSTHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@depts +PREHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@depts +POSTHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@depts +POSTHOOK: Lineage: depts.deptno SCRIPT [] +POSTHOOK: Lineage: depts.locationid SCRIPT [] +POSTHOOK: Lineage: depts.name SCRIPT [] +PREHOOK: query: analyze table depts compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Output: default@depts +#### A masked pattern was here #### +POSTHOOK: query: analyze table depts compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Output: default@depts +#### A masked pattern was here #### +PREHOOK: query: create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dependents +POSTHOOK: query: create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dependents +PREHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dependents +POSTHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dependents +POSTHOOK: Lineage: dependents.empid SCRIPT [] +POSTHOOK: Lineage: dependents.name SCRIPT [] +PREHOOK: query: analyze table dependents compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Output: default@dependents +#### A masked pattern was here #### +POSTHOOK: query: analyze table dependents compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Output: default@dependents +#### A masked pattern was here #### +PREHOOK: query: create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@locations +POSTHOOK: query: create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@locations +PREHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@locations +POSTHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@locations +POSTHOOK: Lineage: locations.locationid SCRIPT [] +POSTHOOK: Lineage: locations.name SCRIPT [] +PREHOOK: query: analyze table locations compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@locations +PREHOOK: Output: default@locations +#### A masked pattern was here #### +POSTHOOK: query: analyze table locations compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@locations +POSTHOOK: Output: default@locations +#### A masked pattern was here #### +PREHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table dependents add constraint pk3 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table dependents add constraint pk3 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table locations add constraint pk4 primary key (locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table locations add constraint pk4 primary key (locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: create materialized view mv1 enable rewrite as +select name, salary, count(*) as c, sum(empid) as s +from emps group by name, salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select name, salary, count(*) as c, sum(empid) as s +from emps group by name, salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select name, count(*) as c, sum(empid) as s +from emps group by name +PREHOOK: type: QUERY +POSTHOOK: query: explain +select name, count(*) as c, sum(empid) as s +from emps group by name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)), c (type: bigint), s (type: bigint) + outputColumnNames: name, c, s + Statistics: Num rows: 4 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: $sum0(c), sum(s) + keys: name (type: varchar(256)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: varchar(256)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select name, count(*) as c, sum(empid) as s +from emps group by name +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select name, count(*) as c, sum(empid) as s +from emps group by name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +Bill 2 210 +Eric 1 200 +Sebastian 1 150 +Theodore 1 110 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select name, salary, count(*) as c, sum(empid) as s +from emps group by name, salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select name, salary, count(*) as c, sum(empid) as s +from emps group by name, salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select salary, name, sum(empid) as s, count(*) as c +from emps group by name, salary +PREHOOK: type: QUERY +POSTHOOK: query: explain +select salary, name, sum(empid) as s, count(*) as c +from emps group by name, salary +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: salary (type: float), name (type: varchar(256)), s (type: bigint), c (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select salary, name, sum(empid) as s, count(*) as c +from emps group by name, salary +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select salary, name, sum(empid) as s, count(*) as c +from emps group by name, salary +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +10000.0 Bill 210 2 +10000.0 Theodore 110 1 +7000.0 Sebastian 150 1 +8000.0 Eric 200 1 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select empid, emps.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by empid, emps.deptno +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select empid, emps.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by empid, emps.deptno +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select depts.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by depts.deptno +PREHOOK: type: QUERY +POSTHOOK: query: explain +select depts.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by depts.deptno +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), c (type: bigint), s (type: bigint) + outputColumnNames: deptno, c, s + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: $sum0(c), sum(s) + keys: deptno (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select depts.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by depts.deptno +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select depts.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by depts.deptno +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +10 4 470 +20 1 200 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select empid, emps.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +where emps.deptno >= 10 group by empid, emps.deptno +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select empid, emps.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +where emps.deptno >= 10 group by empid, emps.deptno +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select depts.deptno, sum(empid) as s +from emps join depts using (deptno) +where emps.deptno > 10 group by depts.deptno +PREHOOK: type: QUERY +POSTHOOK: query: explain +select depts.deptno, sum(empid) as s +from emps join depts using (deptno) +where emps.deptno > 10 group by depts.deptno +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptno > 10) (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(s) + keys: deptno (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select depts.deptno, sum(empid) as s +from emps join depts using (deptno) +where emps.deptno > 10 group by depts.deptno +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select depts.deptno, sum(empid) as s +from emps join depts using (deptno) +where emps.deptno > 10 group by depts.deptno +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +20 200 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select empid, depts.deptno, count(*) + 1 as c, sum(empid) as s +from emps join depts using (deptno) +where depts.deptno >= 10 group by empid, depts.deptno +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select empid, depts.deptno, count(*) + 1 as c, sum(empid) as s +from emps join depts using (deptno) +where depts.deptno >= 10 group by empid, depts.deptno +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select depts.deptno, sum(empid) + 1 as s +from emps join depts using (deptno) +where depts.deptno > 10 group by depts.deptno +PREHOOK: type: QUERY +POSTHOOK: query: explain +select depts.deptno, sum(empid) + 1 as s +from emps join depts using (deptno) +where depts.deptno > 10 group by depts.deptno +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptno > 10) (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), s (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), (_col1 + 1L) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select depts.deptno, sum(empid) + 1 as s +from emps join depts using (deptno) +where depts.deptno > 10 group by depts.deptno +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select depts.deptno, sum(empid) + 1 as s +from emps join depts using (deptno) +where depts.deptno > 10 group by depts.deptno +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +20 201 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select depts.name, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +group by depts.name +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select depts.name, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +group by depts.name +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select dependents.empid, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (depts.name = dependents.name) +group by dependents.empid +PREHOOK: type: QUERY +POSTHOOK: query: explain +select dependents.empid, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (depts.name = dependents.name) +group by dependents.empid +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)), s (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + TableScan + alias: dependents + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col1 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: varchar(256)) + 1 _col1 (type: varchar(256)) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select dependents.empid, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (depts.name = dependents.name) +group by dependents.empid +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select dependents.empid, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (depts.name = dependents.name) +group by dependents.empid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select dependents.empid, emps.deptno, count(distinct salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select dependents.empid, emps.deptno, count(distinct salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select emps.deptno, count(distinct salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno +PREHOOK: type: QUERY +POSTHOOK: query: explain +select emps.deptno, count(distinct salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), s (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select emps.deptno, count(distinct salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select emps.deptno, count(distinct salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 diff --git ql/src/test/results/clientpositive/materialized_view_rewrite_5.q.out ql/src/test/results/clientpositive/materialized_view_rewrite_5.q.out new file mode 100644 index 0000000..bab3cac --- /dev/null +++ ql/src/test/results/clientpositive/materialized_view_rewrite_5.q.out @@ -0,0 +1,1522 @@ +PREHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps +POSTHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps +PREHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@emps +POSTHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@emps +POSTHOOK: Lineage: emps.commission SCRIPT [] +POSTHOOK: Lineage: emps.deptno SCRIPT [] +POSTHOOK: Lineage: emps.empid SCRIPT [] +POSTHOOK: Lineage: emps.name SCRIPT [] +POSTHOOK: Lineage: emps.salary SCRIPT [] +PREHOOK: query: analyze table emps compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Output: default@emps +#### A masked pattern was here #### +POSTHOOK: query: analyze table emps compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Output: default@emps +#### A masked pattern was here #### +PREHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@depts +POSTHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@depts +PREHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@depts +POSTHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@depts +POSTHOOK: Lineage: depts.deptno SCRIPT [] +POSTHOOK: Lineage: depts.locationid SCRIPT [] +POSTHOOK: Lineage: depts.name SCRIPT [] +PREHOOK: query: analyze table depts compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Output: default@depts +#### A masked pattern was here #### +POSTHOOK: query: analyze table depts compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Output: default@depts +#### A masked pattern was here #### +PREHOOK: query: create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dependents +POSTHOOK: query: create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dependents +PREHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dependents +POSTHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dependents +POSTHOOK: Lineage: dependents.empid SCRIPT [] +POSTHOOK: Lineage: dependents.name SCRIPT [] +PREHOOK: query: analyze table dependents compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Output: default@dependents +#### A masked pattern was here #### +POSTHOOK: query: analyze table dependents compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Output: default@dependents +#### A masked pattern was here #### +PREHOOK: query: create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@locations +POSTHOOK: query: create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@locations +PREHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@locations +POSTHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@locations +POSTHOOK: Lineage: locations.locationid SCRIPT [] +POSTHOOK: Lineage: locations.name SCRIPT [] +PREHOOK: query: analyze table locations compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@locations +PREHOOK: Output: default@locations +#### A masked pattern was here #### +POSTHOOK: query: analyze table locations compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@locations +POSTHOOK: Output: default@locations +#### A masked pattern was here #### +PREHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table dependents add constraint pk3 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table dependents add constraint pk3 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table locations add constraint pk4 primary key (locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table locations add constraint pk4 primary key (locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table emps change column deptno deptno int constraint nn1 not null disable novalidate rely +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@emps +PREHOOK: Output: default@emps +POSTHOOK: query: alter table emps change column deptno deptno int constraint nn1 not null disable novalidate rely +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@emps +POSTHOOK: Output: default@emps +PREHOOK: query: alter table depts change column locationid locationid int constraint nn2 not null disable novalidate rely +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@depts +PREHOOK: Output: default@depts +POSTHOOK: query: alter table depts change column locationid locationid int constraint nn2 not null disable novalidate rely +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@depts +POSTHOOK: Output: default@depts +PREHOOK: query: create materialized view mv1 enable rewrite as +select name, deptno, salary from emps where deptno > 15 group by name, deptno, salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select name, deptno, salary from emps where deptno > 15 group by name, deptno, salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select name from emps where deptno >= 20 group by name +PREHOOK: type: QUERY +POSTHOOK: query: explain +select name from emps where deptno >= 20 group by name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptno >= 20) (type: boolean) + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)) + outputColumnNames: name + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: name (type: varchar(256)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: varchar(256)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select name from emps where deptno >= 20 group by name +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select name from emps where deptno >= 20 group by name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +Eric +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select name, deptno, salary, count(*) as c, sum(empid) as s +from emps where deptno >= 15 group by name, deptno, salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select name, deptno, salary, count(*) as c, sum(empid) as s +from emps where deptno >= 15 group by name, deptno, salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select name, sum(empid) as s +from emps where deptno > 15 group by name +PREHOOK: type: QUERY +POSTHOOK: query: explain +select name, sum(empid) as s +from emps where deptno > 15 group by name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptno > 15) (type: boolean) + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)), s (type: bigint) + outputColumnNames: name, s + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(s) + keys: name (type: varchar(256)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: varchar(256)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select name, sum(empid) as s +from emps where deptno > 15 group by name +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select name, sum(empid) as s +from emps where deptno > 15 group by name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +Eric 200 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select depts.deptno, dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 and depts.deptno < 20 +group by depts.deptno, dependents.empid +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@locations +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select depts.deptno, dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 and depts.deptno < 20 +group by depts.deptno, dependents.empid +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@locations +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 11 and depts.deptno < 19 +group by dependents.empid +PREHOOK: type: QUERY +POSTHOOK: query: explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 11 and depts.deptno < 19 +group by dependents.empid +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((deptno < 19) and (deptno > 11)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 11 and depts.deptno < 19 +group by dependents.empid +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@locations +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 11 and depts.deptno < 19 +group by dependents.empid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@locations +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select empid, depts.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by empid, depts.deptno +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select empid, depts.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by empid, depts.deptno +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select deptno from emps group by deptno +PREHOOK: type: QUERY +POSTHOOK: query: explain +select deptno from emps group by deptno +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select deptno from emps group by deptno +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select deptno from emps group by deptno +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +10 +20 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select empid, depts.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by empid, depts.deptno +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select empid, depts.deptno, count(*) as c, sum(empid) as s +from emps join depts using (deptno) +group by empid, depts.deptno +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select deptno, empid, sum(empid) as s, count(*) as c +from emps group by empid, deptno +PREHOOK: type: QUERY +POSTHOOK: query: explain +select deptno, empid, sum(empid) as s, count(*) as c +from emps group by empid, deptno +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), empid (type: int), s (type: bigint), c (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select deptno, empid, sum(empid) as s, count(*) as c +from emps group by empid, deptno +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select deptno, empid, sum(empid) as s, count(*) as c +from emps group by empid, deptno +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +10 100 100 1 +10 110 220 2 +10 150 150 1 +20 200 200 1 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select dependents.empid, emps.deptno, sum(salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select dependents.empid, emps.deptno, sum(salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select dependents.empid, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +group by dependents.empid +PREHOOK: type: QUERY +POSTHOOK: query: explain +select dependents.empid, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +group by dependents.empid +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: depts + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), deptno (type: int), s (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: double) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col3) + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select dependents.empid, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +group by dependents.empid +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select dependents.empid, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +group by dependents.empid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select dependents.empid, emps.deptno, sum(salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select dependents.empid, emps.deptno, sum(salary) as s +from emps +join dependents on (emps.empid = dependents.empid) +group by dependents.empid, emps.deptno +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select depts.name, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +group by depts.name +PREHOOK: type: QUERY +POSTHOOK: query: explain +select depts.name, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +group by depts.name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: depts + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: varchar(256)) + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), s (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col3) + keys: _col1 (type: varchar(256)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: varchar(256)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select depts.name, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +group by depts.name +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select depts.name, sum(salary) as s +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +group by depts.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: create materialized view mv1 enable rewrite as +select a.empid deptno from +(select * from emps where empid = 1) a +join depts on (a.deptno = depts.deptno) +join dependents on (a.empid = dependents.empid) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select a.empid deptno from +(select * from emps where empid = 1) a +join depts on (a.deptno = depts.deptno) +join dependents on (a.empid = dependents.empid) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select a.empid from +(select * from emps where empid = 1) a +join dependents on (a.empid = dependents.empid) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.empid from +(select * from emps where empid = 1) a +join dependents on (a.empid = dependents.empid) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + ListSink + +PREHOOK: query: select a.empid from +(select * from emps where empid = 1) a +join dependents on (a.empid = dependents.empid) +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select a.empid from +(select * from emps where empid = 1) a +join dependents on (a.empid = dependents.empid) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: create materialized view mv1 enable rewrite as +select a.empid, a.deptno from +(select * from emps where empid = 1) a +join depts on (a.deptno = depts.deptno) +join dependents on (a.empid = dependents.empid) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select a.empid, a.deptno from +(select * from emps where empid = 1) a +join depts on (a.deptno = depts.deptno) +join dependents on (a.empid = dependents.empid) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select a.empid from +(select * from emps where empid = 1) a +join dependents on (a.empid = dependents.empid) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.empid from +(select * from emps where empid = 1) a +join dependents on (a.empid = dependents.empid) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + ListSink + +PREHOOK: query: select a.empid from +(select * from emps where empid = 1) a +join dependents on (a.empid = dependents.empid) +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select a.empid from +(select * from emps where empid = 1) a +join dependents on (a.empid = dependents.empid) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select empid deptno from +(select * from emps where empid = 1) a +join depts on (a.deptno = depts.deptno) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select empid deptno from +(select * from emps where empid = 1) a +join depts on (a.deptno = depts.deptno) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select empid from emps where empid = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select empid from emps where empid = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + ListSink + +PREHOOK: query: select empid from emps where empid = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select empid from emps where empid = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: create materialized view mv1 enable rewrite as +select emps.empid, emps.deptno from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1 +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select emps.empid, emps.deptno from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1 +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + ListSink + +PREHOOK: query: select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +Warning: Shuffle Join JOIN[16][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_0]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: create materialized view mv1 enable rewrite as +select emps.empid, emps.deptno from emps +join depts a on (emps.deptno=a.deptno) +join depts b on (emps.deptno=b.deptno) +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1 +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select emps.empid, emps.deptno from emps +join depts a on (emps.deptno=a.deptno) +join depts b on (emps.deptno=b.deptno) +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1 +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + ListSink + +PREHOOK: query: select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.empid = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select emps.empid, emps.deptno from emps +join depts a on (emps.deptno=a.deptno) +join depts b on (emps.deptno=b.deptno) +join dependents on (emps.empid = dependents.empid) +where emps.name = 'Sebastian' +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select emps.empid, emps.deptno from emps +join depts a on (emps.deptno=a.deptno) +join depts b on (emps.deptno=b.deptno) +join dependents on (emps.empid = dependents.empid) +where emps.name = 'Sebastian' +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.name = 'Sebastian' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.name = 'Sebastian' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.name = 'Sebastian' +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select emps.empid from emps +join dependents on (emps.empid = dependents.empid) +where emps.name = 'Sebastian' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 diff --git ql/src/test/results/clientpositive/materialized_view_rewrite_6.q.out ql/src/test/results/clientpositive/materialized_view_rewrite_6.q.out new file mode 100644 index 0000000..017d793 --- /dev/null +++ ql/src/test/results/clientpositive/materialized_view_rewrite_6.q.out @@ -0,0 +1,830 @@ +PREHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps +POSTHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps +PREHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@emps +POSTHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@emps +POSTHOOK: Lineage: emps.commission SCRIPT [] +POSTHOOK: Lineage: emps.deptno SCRIPT [] +POSTHOOK: Lineage: emps.empid SCRIPT [] +POSTHOOK: Lineage: emps.name SCRIPT [] +POSTHOOK: Lineage: emps.salary SCRIPT [] +PREHOOK: query: analyze table emps compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Output: default@emps +#### A masked pattern was here #### +POSTHOOK: query: analyze table emps compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Output: default@emps +#### A masked pattern was here #### +PREHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@depts +POSTHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@depts +PREHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@depts +POSTHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@depts +POSTHOOK: Lineage: depts.deptno SCRIPT [] +POSTHOOK: Lineage: depts.locationid SCRIPT [] +POSTHOOK: Lineage: depts.name SCRIPT [] +PREHOOK: query: analyze table depts compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Output: default@depts +#### A masked pattern was here #### +POSTHOOK: query: analyze table depts compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Output: default@depts +#### A masked pattern was here #### +PREHOOK: query: create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dependents +POSTHOOK: query: create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dependents +PREHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dependents +POSTHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dependents +POSTHOOK: Lineage: dependents.empid SCRIPT [] +POSTHOOK: Lineage: dependents.name SCRIPT [] +PREHOOK: query: analyze table dependents compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Output: default@dependents +#### A masked pattern was here #### +POSTHOOK: query: analyze table dependents compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Output: default@dependents +#### A masked pattern was here #### +PREHOOK: query: create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@locations +POSTHOOK: query: create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@locations +PREHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@locations +POSTHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@locations +POSTHOOK: Lineage: locations.locationid SCRIPT [] +POSTHOOK: Lineage: locations.name SCRIPT [] +PREHOOK: query: analyze table locations compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@locations +PREHOOK: Output: default@locations +#### A masked pattern was here #### +POSTHOOK: query: analyze table locations compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@locations +POSTHOOK: Output: default@locations +#### A masked pattern was here #### +PREHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table dependents add constraint pk3 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table dependents add constraint pk3 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table locations add constraint pk4 primary key (locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table locations add constraint pk4 primary key (locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table emps change column deptno deptno int constraint nn1 not null disable novalidate rely +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@emps +PREHOOK: Output: default@emps +POSTHOOK: query: alter table emps change column deptno deptno int constraint nn1 not null disable novalidate rely +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@emps +POSTHOOK: Output: default@emps +PREHOOK: query: alter table depts change column locationid locationid int constraint nn2 not null disable novalidate rely +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@depts +PREHOOK: Output: default@depts +POSTHOOK: query: alter table depts change column locationid locationid int constraint nn2 not null disable novalidate rely +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@depts +POSTHOOK: Output: default@depts +PREHOOK: query: create materialized view mv1 enable rewrite as +select name, deptno, salary, count(*) + 1 as c, sum(empid) as s +from emps where deptno >= 10 group by name, deptno, salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select name, deptno, salary, count(*) + 1 as c, sum(empid) as s +from emps where deptno >= 10 group by name, deptno, salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select salary, sum(empid) + 1 as s +from emps where deptno > 10 group by salary +PREHOOK: type: QUERY +POSTHOOK: query: explain +select salary, sum(empid) + 1 as s +from emps where deptno > 10 group by salary +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptno > 10) (type: boolean) + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: salary (type: float), s (type: bigint) + outputColumnNames: salary, s + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(s) + keys: salary (type: float) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: float) + sort order: + + Map-reduce partition columns: _col0 (type: float) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: float) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: float), (_col1 + 1L) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select salary, sum(empid) + 1 as s +from emps where deptno > 10 group by salary +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select salary, sum(empid) + 1 as s +from emps where deptno > 10 group by salary +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +8000.0 201 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select name, deptno, salary, count(*) + 1 as c, sum(empid) as s +from emps where deptno >= 15 group by name, deptno, salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select name, deptno, salary, count(*) + 1 as c, sum(empid) as s +from emps where deptno >= 15 group by name, deptno, salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select salary + 1, sum(empid) + 1 as s +from emps where deptno > 15 group by salary +PREHOOK: type: QUERY +POSTHOOK: query: explain +select salary + 1, sum(empid) + 1 as s +from emps where deptno > 15 group by salary +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptno > 15) (type: boolean) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: salary (type: float), s (type: bigint) + outputColumnNames: salary, s + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(s) + keys: salary (type: float) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: float) + sort order: + + Map-reduce partition columns: _col0 (type: float) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: float) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 + 1.0) (type: float), (_col1 + 1L) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select salary + 1, sum(empid) + 1 as s +from emps where deptno > 15 group by salary +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select salary + 1, sum(empid) + 1 as s +from emps where deptno > 15 group by salary +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +8001.0 201 +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select depts.name +from emps +join depts on (emps.deptno = depts.deptno) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select depts.name +from emps +join depts on (emps.deptno = depts.deptno) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select dependents.empid +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (depts.name = dependents.name) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select dependents.empid +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (depts.name = dependents.name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: dependents + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col1 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: varchar(256)) + 1 _col1 (type: varchar(256)) + outputColumnNames: _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select dependents.empid +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (depts.name = dependents.name) +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select dependents.empid +from emps +join depts on (emps.deptno = depts.deptno) +join dependents on (depts.name = dependents.name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select depts.name +from emps +join depts on (emps.deptno = depts.deptno) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select depts.name +from emps +join depts on (emps.deptno = depts.deptno) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: locations + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: dependents + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col1 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: varchar(256)) + 1 _col0 (type: varchar(256)) + 2 _col1 (type: varchar(256)) + outputColumnNames: _col2 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@locations +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@locations +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select emps.empid, emps.deptno, emps.name as name1, emps.salary, emps.commission, dependents.name as name2 +from emps join dependents on (emps.empid = dependents.empid) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select emps.empid, emps.deptno, emps.name as name1, emps.salary, emps.commission, dependents.name as name2 +from emps join dependents on (emps.empid = dependents.empid) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select emps.empid, dependents.empid, emps.deptno +from emps +join dependents on (emps.empid = dependents.empid) +join depts a on (emps.deptno=a.deptno) +where emps.name = 'Bill' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select emps.empid, dependents.empid, emps.deptno +from emps +join dependents on (emps.empid = dependents.empid) +join depts a on (emps.deptno=a.deptno) +where emps.name = 'Bill' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToString(name1) = 'Bill') (type: boolean) + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), deptno (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col1 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select emps.empid, dependents.empid, emps.deptno +from emps +join dependents on (emps.empid = dependents.empid) +join depts a on (emps.deptno=a.deptno) +where emps.name = 'Bill' +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select emps.empid, dependents.empid, emps.deptno +from emps +join dependents on (emps.empid = dependents.empid) +join depts a on (emps.deptno=a.deptno) +where emps.name = 'Bill' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 diff --git ql/src/test/results/clientpositive/materialized_view_rewrite_7.q.out ql/src/test/results/clientpositive/materialized_view_rewrite_7.q.out new file mode 100644 index 0000000..486a50d --- /dev/null +++ ql/src/test/results/clientpositive/materialized_view_rewrite_7.q.out @@ -0,0 +1,1036 @@ +PREHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps +POSTHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps +PREHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@emps +POSTHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@emps +POSTHOOK: Lineage: emps.commission SCRIPT [] +POSTHOOK: Lineage: emps.deptno SCRIPT [] +POSTHOOK: Lineage: emps.empid SCRIPT [] +POSTHOOK: Lineage: emps.name SCRIPT [] +POSTHOOK: Lineage: emps.salary SCRIPT [] +PREHOOK: query: analyze table emps compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Output: default@emps +#### A masked pattern was here #### +POSTHOOK: query: analyze table emps compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Output: default@emps +#### A masked pattern was here #### +PREHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@depts +POSTHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@depts +PREHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@depts +POSTHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@depts +POSTHOOK: Lineage: depts.deptno SCRIPT [] +POSTHOOK: Lineage: depts.locationid SCRIPT [] +POSTHOOK: Lineage: depts.name SCRIPT [] +PREHOOK: query: analyze table depts compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Output: default@depts +#### A masked pattern was here #### +POSTHOOK: query: analyze table depts compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Output: default@depts +#### A masked pattern was here #### +PREHOOK: query: create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dependents +POSTHOOK: query: create table dependents ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dependents +PREHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dependents +POSTHOOK: query: insert into dependents values (10, 'Michael'), (10, 'Jane') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dependents +POSTHOOK: Lineage: dependents.empid SCRIPT [] +POSTHOOK: Lineage: dependents.name SCRIPT [] +PREHOOK: query: analyze table dependents compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Output: default@dependents +#### A masked pattern was here #### +POSTHOOK: query: analyze table dependents compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Output: default@dependents +#### A masked pattern was here #### +PREHOOK: query: create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@locations +POSTHOOK: query: create table locations ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@locations +PREHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@locations +POSTHOOK: query: insert into locations values (10, 'San Francisco'), (10, 'San Diego') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@locations +POSTHOOK: Lineage: locations.locationid SCRIPT [] +POSTHOOK: Lineage: locations.name SCRIPT [] +PREHOOK: query: analyze table locations compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@locations +PREHOOK: Output: default@locations +#### A masked pattern was here #### +POSTHOOK: query: analyze table locations compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@locations +POSTHOOK: Output: default@locations +#### A masked pattern was here #### +PREHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table dependents add constraint pk3 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table dependents add constraint pk3 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table locations add constraint pk4 primary key (locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table locations add constraint pk4 primary key (locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint fk2 foreign key (locationid) references locations(locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table emps change column deptno deptno int constraint nn1 not null disable novalidate rely +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@emps +PREHOOK: Output: default@emps +POSTHOOK: query: alter table emps change column deptno deptno int constraint nn1 not null disable novalidate rely +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@emps +POSTHOOK: Output: default@emps +PREHOOK: query: alter table depts change column locationid locationid int constraint nn2 not null disable novalidate rely +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@depts +PREHOOK: Output: default@depts +POSTHOOK: query: alter table depts change column locationid locationid int constraint nn2 not null disable novalidate rely +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@depts +POSTHOOK: Output: default@depts +PREHOOK: query: create materialized view mv1 enable rewrite as +select depts.deptno, dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 11 +group by depts.deptno, dependents.empid +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@locations +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select depts.deptno, dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 11 +group by depts.deptno, dependents.empid +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@locations +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select dependents.empid, depts.deptno +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 +group by dependents.empid, depts.deptno +PREHOOK: type: QUERY +POSTHOOK: query: explain +select dependents.empid, depts.deptno +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 +group by dependents.empid, depts.deptno +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-1 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: depts + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((deptno <= 11) and (deptno > 10) and name is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: varchar(256)) + TableScan + alias: emps + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((deptno <= 11) and (deptno > 10)) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: dependents + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col1 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + TableScan + alias: locations + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col1 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col1 (type: varchar(256)) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col1 (type: varchar(256)) + 1 _col0 (type: varchar(256)) + 2 _col1 (type: varchar(256)) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), deptno (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select dependents.empid, depts.deptno +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 +group by dependents.empid, depts.deptno +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@locations +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select dependents.empid, depts.deptno +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 +group by dependents.empid, depts.deptno +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@locations +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select depts.deptno, dependents.empid, count(emps.salary) as s +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 11 and depts.deptno < 19 +group by depts.deptno, dependents.empid +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@locations +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select depts.deptno, dependents.empid, count(emps.salary) as s +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 11 and depts.deptno < 19 +group by depts.deptno, dependents.empid +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@locations +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select dependents.empid, count(emps.salary) + 1 +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 and depts.deptno < 20 +group by dependents.empid +PREHOOK: type: QUERY +POSTHOOK: query: explain +select dependents.empid, count(emps.salary) + 1 +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 and depts.deptno < 20 +group by dependents.empid +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-1 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-7 + Stage-7 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: emps + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((deptno <= 11) or (deptno >= 19)) and (deptno < 20) and (deptno > 10)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), salary (type: float) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: float) + TableScan + alias: depts + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((deptno <= 11) or (deptno >= 19)) and (deptno < 20) and (deptno > 10) and name is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: varchar(256)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: dependents + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col1 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + TableScan + alias: locations + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: varchar(256)) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col3 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col3 (type: varchar(256)) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: float) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col1 (type: varchar(256)) + 1 _col0 (type: varchar(256)) + 2 _col3 (type: varchar(256)) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col4) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: $sum0(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + Union + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: $sum0(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), (_col1 + 1L) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), s (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: $sum0(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select dependents.empid, count(emps.salary) + 1 +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 and depts.deptno < 20 +group by dependents.empid +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@locations +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select dependents.empid, count(emps.salary) + 1 +from depts +join dependents on (depts.name = dependents.name) +join locations on (locations.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 10 and depts.deptno < 20 +group by dependents.empid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@locations +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +PREHOOK: query: create materialized view mv1 enable rewrite as +select depts.deptno, dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno >= 10 +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1 +POSTHOOK: query: create materialized view mv1 enable rewrite as +select depts.deptno, dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno >= 10 +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1 +PREHOOK: query: analyze table mv1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: depts + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((deptno < 10) and (deptno > 0) and name is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: varchar(256)) + TableScan + alias: emps + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((deptno < 10) and (deptno > 0)) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col1 (type: varchar(256)) + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: dependents + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col1 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: varchar(256)) + 1 _col1 (type: varchar(256)) + outputColumnNames: _col3 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: default.mv1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@dependents +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +PREHOOK: Input: default@mv1 +#### A masked pattern was here #### +POSTHOOK: query: select dependents.empid +from depts +join dependents on (depts.name = dependents.name) +join emps on (emps.deptno = depts.deptno) +where depts.deptno > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dependents +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +POSTHOOK: Input: default@mv1 +#### A masked pattern was here #### +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1 +PREHOOK: Output: default@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1 +POSTHOOK: Output: default@mv1 diff --git ql/src/test/results/clientpositive/materialized_view_rewrite_8.q.out ql/src/test/results/clientpositive/materialized_view_rewrite_8.q.out new file mode 100644 index 0000000..1ca06d3 --- /dev/null +++ ql/src/test/results/clientpositive/materialized_view_rewrite_8.q.out @@ -0,0 +1,642 @@ +PREHOOK: query: create table if not exists source_table_001 ( +MY_DATE date, +MY_ID bigint, +MY_ID2 bigint, +ENVIRONMENT string, +DOWN_VOLUME bigint, +UP_VOLUME bigint +) +stored AS ORC +TBLPROPERTIES("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@source_table_001 +POSTHOOK: query: create table if not exists source_table_001 ( +MY_DATE date, +MY_ID bigint, +MY_ID2 bigint, +ENVIRONMENT string, +DOWN_VOLUME bigint, +UP_VOLUME bigint +) +stored AS ORC +TBLPROPERTIES("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@source_table_001 +PREHOOK: query: insert into table source_table_001 + values ('2010-10-10', 1, 1, 'env', 1, 1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@source_table_001 +POSTHOOK: query: insert into table source_table_001 + values ('2010-10-10', 1, 1, 'env', 1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@source_table_001 +POSTHOOK: Lineage: source_table_001.down_volume SCRIPT [] +POSTHOOK: Lineage: source_table_001.environment SCRIPT [] +POSTHOOK: Lineage: source_table_001.my_date SCRIPT [] +POSTHOOK: Lineage: source_table_001.my_id SCRIPT [] +POSTHOOK: Lineage: source_table_001.my_id2 SCRIPT [] +POSTHOOK: Lineage: source_table_001.up_volume SCRIPT [] +PREHOOK: query: analyze table source_table_001 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@source_table_001 +PREHOOK: Output: default@source_table_001 +#### A masked pattern was here #### +POSTHOOK: query: analyze table source_table_001 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@source_table_001 +POSTHOOK: Output: default@source_table_001 +#### A masked pattern was here #### +PREHOOK: query: CREATE MATERIALIZED VIEW source_table_001_mv ENABLE REWRITE AS +SELECT +SUM(A.DOWN_VOLUME) AS DOWN_VOLUME_SUM, +SUM(A.UP_VOLUME) AS UP_VOLUME_SUM, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +from source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@source_table_001 +PREHOOK: Output: database:default +PREHOOK: Output: default@source_table_001_mv +POSTHOOK: query: CREATE MATERIALIZED VIEW source_table_001_mv ENABLE REWRITE AS +SELECT +SUM(A.DOWN_VOLUME) AS DOWN_VOLUME_SUM, +SUM(A.UP_VOLUME) AS UP_VOLUME_SUM, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +from source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@source_table_001 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@source_table_001_mv +PREHOOK: query: analyze table source_table_001_mv compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@source_table_001_mv +PREHOOK: Output: default@source_table_001_mv +#### A masked pattern was here #### +POSTHOOK: query: analyze table source_table_001_mv compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@source_table_001_mv +POSTHOOK: Output: default@source_table_001_mv +#### A masked pattern was here #### +PREHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.source_table_001_mv + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: down_volume_sum (type: bigint), my_date (type: date), my_id2 (type: bigint), environment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +LIMIT 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +LIMIT 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + TableScan + alias: default.source_table_001_mv + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: down_volume_sum (type: bigint), my_date (type: date), my_id2 (type: bigint), environment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain +select +1, +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +1, +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.source_table_001_mv + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int), down_volume_sum (type: bigint), my_date (type: date), my_id2 (type: bigint), environment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 163 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain +select +SUM(A.DOWN_VOLUME) + 0 AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +SUM(A.DOWN_VOLUME) + 0 AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.source_table_001_mv + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (down_volume_sum + 0L) (type: bigint), my_date (type: date), my_id2 (type: bigint), environment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +ORDER BY A.MY_ID2 +LIMIT 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +ORDER BY A.MY_ID2 +LIMIT 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.source_table_001_mv + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: down_volume_sum (type: bigint), my_date (type: date), my_id2 (type: bigint), environment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: bigint), _col1 (type: date), _col3 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: date), KEY.reducesinkkey0 (type: bigint), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: explain +select +distinct A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +distinct A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.source_table_001_mv + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: my_date (type: date), my_id2 (type: bigint), environment (type: string) + outputColumnNames: my_date, my_id2, environment + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: my_date (type: date), my_id2 (type: bigint), environment (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: bigint), _col2 (type: string) + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: date), KEY._col1 (type: bigint), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10') +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10') +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.source_table_001_mv + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (my_date = DATE'2010-01-10') (type: boolean) + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: down_volume_sum (type: bigint), DATE'2010-01-10' (type: date), my_id2 (type: bigint), environment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select +SUM(A.DOWN_VOLUME) + SUM(A.UP_VOLUME) AS TOTAL_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10') +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +SUM(A.DOWN_VOLUME) + SUM(A.UP_VOLUME) AS TOTAL_VOLUME_BYTES, +A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10') +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.source_table_001_mv + Statistics: Num rows: 1 Data size: 167 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (my_date = DATE'2010-01-10') (type: boolean) + Statistics: Num rows: 1 Data size: 167 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (down_volume_sum + up_volume_sum) (type: bigint), DATE'2010-01-10' (type: date), my_id2 (type: bigint), environment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 159 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10') +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.source_table_001_mv + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (my_date = DATE'2010-01-10') (type: boolean) + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: down_volume_sum (type: bigint) + outputColumnNames: down_volume_sum + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(down_volume_sum) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +TO_DATE('2010-01-10') +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10') +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +TO_DATE('2010-01-10') +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.source_table_001_mv + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (my_date = DATE'2010-01-10') (type: boolean) + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: down_volume_sum (type: bigint) + outputColumnNames: down_volume_sum + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(down_volume_sum) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint), DATE'2010-01-10' (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10') +group by A.MY_DATE +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +A.MY_DATE +FROM source_table_001 AS A +where A.MY_DATE=TO_DATE('2010-01-10') +group by A.MY_DATE +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.source_table_001_mv + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (my_date = DATE'2010-01-10') (type: boolean) + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: down_volume_sum (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + keys: DATE'2010-01-10' (type: date) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Map-reduce partition columns: _col0 (type: date) + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: date) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint), DATE'2010-01-10' (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop materialized view source_table_001_mv +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@source_table_001_mv +PREHOOK: Output: default@source_table_001_mv +POSTHOOK: query: drop materialized view source_table_001_mv +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@source_table_001_mv +POSTHOOK: Output: default@source_table_001_mv diff --git ql/src/test/results/clientpositive/materialized_view_rewrite_9.q.out ql/src/test/results/clientpositive/materialized_view_rewrite_9.q.out new file mode 100644 index 0000000..3120e0d --- /dev/null +++ ql/src/test/results/clientpositive/materialized_view_rewrite_9.q.out @@ -0,0 +1,361 @@ +PREHOOK: query: create table if not exists source_table_001 ( +MY_DATE timestamp, +MY_ID bigint, +MY_ID2 bigint, +ENVIRONMENT string, +DOWN_VOLUME bigint, +UP_VOLUME bigint +) +stored AS ORC +TBLPROPERTIES("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@source_table_001 +POSTHOOK: query: create table if not exists source_table_001 ( +MY_DATE timestamp, +MY_ID bigint, +MY_ID2 bigint, +ENVIRONMENT string, +DOWN_VOLUME bigint, +UP_VOLUME bigint +) +stored AS ORC +TBLPROPERTIES("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@source_table_001 +PREHOOK: query: insert into table source_table_001 + values ('2010-10-10 00:00:00', 1, 1, 'env', 1, 1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@source_table_001 +POSTHOOK: query: insert into table source_table_001 + values ('2010-10-10 00:00:00', 1, 1, 'env', 1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@source_table_001 +POSTHOOK: Lineage: source_table_001.down_volume SCRIPT [] +POSTHOOK: Lineage: source_table_001.environment SCRIPT [] +POSTHOOK: Lineage: source_table_001.my_date SCRIPT [] +POSTHOOK: Lineage: source_table_001.my_id SCRIPT [] +POSTHOOK: Lineage: source_table_001.my_id2 SCRIPT [] +POSTHOOK: Lineage: source_table_001.up_volume SCRIPT [] +PREHOOK: query: analyze table source_table_001 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@source_table_001 +PREHOOK: Output: default@source_table_001 +#### A masked pattern was here #### +POSTHOOK: query: analyze table source_table_001 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@source_table_001 +POSTHOOK: Output: default@source_table_001 +#### A masked pattern was here #### +PREHOOK: query: CREATE MATERIALIZED VIEW source_table_001_mv ENABLE REWRITE AS +SELECT +SUM(A.DOWN_VOLUME) AS DOWN_VOLUME_SUM, +SUM(A.UP_VOLUME) AS UP_VOLUME_SUM, +A.MY_ID,A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +from source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@source_table_001 +PREHOOK: Output: database:default +PREHOOK: Output: default@source_table_001_mv +POSTHOOK: query: CREATE MATERIALIZED VIEW source_table_001_mv ENABLE REWRITE AS +SELECT +SUM(A.DOWN_VOLUME) AS DOWN_VOLUME_SUM, +SUM(A.UP_VOLUME) AS UP_VOLUME_SUM, +A.MY_ID,A.MY_DATE,A.MY_ID2,A.ENVIRONMENT +from source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,A.MY_DATE +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@source_table_001 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@source_table_001_mv +PREHOOK: query: analyze table source_table_001_mv compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@source_table_001_mv +PREHOOK: Output: default@source_table_001_mv +#### A masked pattern was here #### +POSTHOOK: query: analyze table source_table_001_mv compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@source_table_001_mv +POSTHOOK: Output: default@source_table_001_mv +#### A masked pattern was here #### +PREHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +FLOOR(A.MY_DATE to hour),A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to hour) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +FLOOR(A.MY_DATE to hour),A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to hour) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.source_table_001_mv + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: down_volume_sum (type: bigint), my_id (type: bigint), my_id2 (type: bigint), environment (type: string), floor_hour(my_date) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + keys: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: string), _col3 (type: timestamp) + sort order: ++++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: string), _col3 (type: timestamp) + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col4 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col4 (type: bigint), _col3 (type: timestamp), _col1 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 143 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 143 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP MATERIALIZED VIEW source_table_001_mv +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@source_table_001_mv +PREHOOK: Output: default@source_table_001_mv +POSTHOOK: query: DROP MATERIALIZED VIEW source_table_001_mv +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@source_table_001_mv +POSTHOOK: Output: default@source_table_001_mv +PREHOOK: query: CREATE MATERIALIZED VIEW source_table_001_mv ENABLE REWRITE AS +SELECT +SUM(A.DOWN_VOLUME) AS DOWN_VOLUME_SUM, +SUM(A.UP_VOLUME) AS UP_VOLUME_SUM, +A.MY_ID,FLOOR(A.MY_DATE to hour),A.MY_ID2,A.ENVIRONMENT +from source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to hour) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@source_table_001 +PREHOOK: Output: database:default +PREHOOK: Output: default@source_table_001_mv +POSTHOOK: query: CREATE MATERIALIZED VIEW source_table_001_mv ENABLE REWRITE AS +SELECT +SUM(A.DOWN_VOLUME) AS DOWN_VOLUME_SUM, +SUM(A.UP_VOLUME) AS UP_VOLUME_SUM, +A.MY_ID,FLOOR(A.MY_DATE to hour),A.MY_ID2,A.ENVIRONMENT +from source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to hour) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@source_table_001 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@source_table_001_mv +PREHOOK: query: analyze table source_table_001_mv compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@source_table_001_mv +PREHOOK: Output: default@source_table_001_mv +#### A masked pattern was here #### +POSTHOOK: query: analyze table source_table_001_mv compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@source_table_001_mv +POSTHOOK: Output: default@source_table_001_mv +#### A masked pattern was here #### +PREHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +FLOOR(A.MY_DATE to day),A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to day) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +FLOOR(A.MY_DATE to day),A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to day) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.source_table_001_mv + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: down_volume_sum (type: bigint), my_id (type: bigint), my_id2 (type: bigint), environment (type: string), floor_day(_c3) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + keys: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: string), _col3 (type: timestamp) + sort order: ++++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: string), _col3 (type: timestamp) + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col4 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col4 (type: bigint), _col3 (type: timestamp), _col1 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 143 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 143 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +FLOOR(A.MY_DATE to hour),A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to hour) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +FLOOR(A.MY_DATE to hour),A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to hour) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.source_table_001_mv + Statistics: Num rows: 1 Data size: 143 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: down_volume_sum (type: bigint), _c3 (type: timestamp), my_id2 (type: bigint), environment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 143 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +FLOOR(A.MY_DATE to second),A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to second) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select +SUM(A.DOWN_VOLUME) AS DOWNLOAD_VOLUME_BYTES, +FLOOR(A.MY_DATE to second),A.MY_ID2,A.ENVIRONMENT +FROM source_table_001 AS A +group by A.MY_ID,A.MY_ID2,A.ENVIRONMENT,FLOOR(A.MY_DATE to second) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: my_id (type: bigint), my_id2 (type: bigint), environment (type: string), floor_second(my_date) (type: timestamp), down_volume (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col4) + keys: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: string), _col3 (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: string), _col3 (type: timestamp) + sort order: ++++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: string), _col3 (type: timestamp) + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col4 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col4 (type: bigint), _col3 (type: timestamp), _col1 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 143 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 143 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP MATERIALIZED VIEW source_table_001_mv +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@source_table_001_mv +PREHOOK: Output: default@source_table_001_mv +POSTHOOK: query: DROP MATERIALIZED VIEW source_table_001_mv +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@source_table_001_mv +POSTHOOK: Output: default@source_table_001_mv diff --git ql/src/test/results/clientpositive/outer_reference_windowed.q.out ql/src/test/results/clientpositive/outer_reference_windowed.q.out index aef80c5..3d918b7 100644 --- ql/src/test/results/clientpositive/outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/outer_reference_windowed.q.out @@ -578,36 +578,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: e011_03 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE + alias: e011_01 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE + expressions: c1 (type: decimal(15,2)) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(15,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(15,2)) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE TableScan - alias: e011_01 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + alias: e011_03 + Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c1 is not null (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c1 (type: decimal(15,2)) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(15,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(15,2)) Reduce Operator Tree: Join Operator condition map: @@ -616,13 +616,13 @@ STAGE PLANS: 0 _col0 (type: decimal(15,2)) 1 _col0 (type: decimal(15,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + aggregations: sum(_col0) + keys: _col1 (type: decimal(15,2)), _col2 (type: decimal(15,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -638,7 +638,7 @@ STAGE PLANS: key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) sort order: ++ Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(25,2)) Reduce Operator Tree: Group By Operator @@ -646,7 +646,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -662,13 +662,13 @@ STAGE PLANS: key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) sort order: ++ Map-reduce partition columns: _col1 (type: decimal(15,2)) - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(25,2)) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -688,14 +688,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumHiveDecimal window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: decimal(35,2)) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -821,41 +821,37 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col0 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumDouble - window frame: RANGE PRECEDING(MAX)~CURRENT + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: double) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/perf/spark/query12.q.out ql/src/test/results/clientpositive/perf/spark/query12.q.out index 411ad88..af21058 100644 --- ql/src/test/results/clientpositive/perf/spark/query12.q.out +++ ql/src/test/results/clientpositive/perf/spark/query12.q.out @@ -178,20 +178,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)) + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)) Reducer 4 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: decimal(7,2)), VALUE._col4 (type: decimal(17,2)) + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: decimal(7,2)), VALUE._col4 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE PTF Operator @@ -203,8 +199,8 @@ STAGE PLANS: Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS FIRST - partition by: _col3 + order by: _col1 ASC NULLS FIRST + partition by: _col1 raw input shape: window functions: window function definition @@ -215,7 +211,7 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)), ((_col5 * 100) / sum_window_0) (type: decimal(38,17)), _col0 (type: string) + expressions: _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)), ((_col5 * 100) / sum_window_0) (type: decimal(38,17)), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git ql/src/test/results/clientpositive/perf/spark/query20.q.out ql/src/test/results/clientpositive/perf/spark/query20.q.out index 8aa57e1..9097b35 100644 --- ql/src/test/results/clientpositive/perf/spark/query20.q.out +++ ql/src/test/results/clientpositive/perf/spark/query20.q.out @@ -170,20 +170,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)) + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)) Reducer 4 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: decimal(7,2)), VALUE._col4 (type: decimal(17,2)) + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: decimal(7,2)), VALUE._col4 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE PTF Operator @@ -195,8 +191,8 @@ STAGE PLANS: Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS FIRST - partition by: _col3 + order by: _col1 ASC NULLS FIRST + partition by: _col1 raw input shape: window functions: window function definition @@ -207,7 +203,7 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)), ((_col5 * 100) / sum_window_0) (type: decimal(38,17)), _col0 (type: string) + expressions: _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)), ((_col5 * 100) / sum_window_0) (type: decimal(38,17)), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git ql/src/test/results/clientpositive/perf/spark/query47.q.out ql/src/test/results/clientpositive/perf/spark/query47.q.out index a61afcd..5175f80 100644 --- ql/src/test/results/clientpositive/perf/spark/query47.q.out +++ ql/src/test/results/clientpositive/perf/spark/query47.q.out @@ -415,33 +415,29 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int), _col6 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Reduce Output Operator + key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int) + sort order: +++++ + Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) - sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: int), _col6 (type: decimal(17,2)) + value expressions: _col1 (type: int), _col6 (type: decimal(17,2)) Reducer 15 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: decimal(17,2)) + expressions: KEY.reducesinkkey4 (type: int), VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2) + output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col4 ASC NULLS FIRST - partition by: _col0, _col1, _col2, _col3, _col4 + order by: _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST, _col0 ASC NULLS FIRST + partition by: _col3, _col2, _col4, _col5, _col0 raw input shape: window functions: window function definition @@ -452,54 +448,54 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2)) + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) + key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int) sort order: ++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: avg_window_0 (type: decimal(21,6)), _col6 (type: decimal(17,2)) Reducer 16 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), VALUE._col1 (type: decimal(17,2)) + expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: decimal(21,6), _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: int, _col7: decimal(17,2) + output shape: _col0: decimal(21,6), _col1: int, _col2: int, _col3: string, _col4: string, _col5: string, _col6: string, _col7: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col5 ASC NULLS FIRST, _col6 ASC NULLS FIRST - partition by: _col1, _col2, _col3, _col4 + order by: _col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST + partition by: _col4, _col3, _col5, _col6 raw input shape: window functions: window function definition alias: rank_window_1 - arguments: _col5, _col6 + arguments: _col1, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > 0) and (_col5 = 2000) and rank_window_1 is not null) (type: boolean) + predicate: ((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null) (type: boolean) Statistics: Num rows: 63887519 Data size: 5636175475 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: decimal(17,2)) + expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: decimal(17,2)) outputColumnNames: rank_window_1, _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 63887519 Data size: 5636175475 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((_col0 > 0)) THEN (((abs((_col7 - _col0)) / _col0) > 0.1)) ELSE (null) END (type: boolean) Statistics: Num rows: 31943759 Data size: 2818087693 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) + expressions: _col4 (type: string), _col3 (type: string), _col5 (type: string), _col6 (type: string), _col1 (type: int), _col2 (type: int), _col7 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 31943759 Data size: 2818087693 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -582,38 +578,34 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int), _col6 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Reduce Output Operator + key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int) + sort order: ++++++ + Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) - sort order: ++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: decimal(17,2)) + value expressions: _col6 (type: decimal(17,2)) Reducer 24 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2) + output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST - partition by: _col0, _col1, _col2, _col3 + order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col3, _col2, _col4, _col5 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col4, _col5 + arguments: _col0, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -623,7 +615,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: decimal(17,2)), rank_window_0 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -674,38 +666,34 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int), _col6 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Reduce Output Operator + key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int) + sort order: ++++++ + Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) - sort order: ++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: decimal(17,2)) + value expressions: _col6 (type: decimal(17,2)) Reducer 5 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2) + output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST - partition by: _col0, _col1, _col2, _col3 + order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col3, _col2, _col4, _col5 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col4, _col5 + arguments: _col0, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -715,7 +703,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: decimal(17,2)), rank_window_0 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git ql/src/test/results/clientpositive/perf/spark/query57.q.out ql/src/test/results/clientpositive/perf/spark/query57.q.out index b6d58c1..1b66f54 100644 --- ql/src/test/results/clientpositive/perf/spark/query57.q.out +++ ql/src/test/results/clientpositive/perf/spark/query57.q.out @@ -409,33 +409,29 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int), _col5 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int) + sort order: ++++ + Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) - Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int), _col5 (type: decimal(17,2)) + value expressions: _col1 (type: int), _col5 (type: decimal(17,2)) Reducer 15 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: decimal(17,2)) + expressions: KEY.reducesinkkey3 (type: int), VALUE._col0 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: int, _col5: decimal(17,2) + output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col3 ASC NULLS FIRST - partition by: _col0, _col1, _col2, _col3 + order by: _col4 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST + partition by: _col4, _col3, _col2, _col0 raw input shape: window functions: window function definition @@ -446,54 +442,54 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(17,2)) + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(17,2)) outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int) sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: avg_window_0 (type: decimal(21,6)), _col5 (type: decimal(17,2)) Reducer 16 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col1 (type: decimal(17,2)) + expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: decimal(21,6), _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2) + output shape: _col0: decimal(21,6), _col1: int, _col2: int, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST - partition by: _col1, _col2, _col3 + order by: _col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST + partition by: _col5, _col4, _col3 raw input shape: window functions: window function definition alias: rank_window_1 - arguments: _col4, _col5 + arguments: _col1, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > 0) and (_col4 = 2000) and rank_window_1 is not null) (type: boolean) + predicate: ((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null) (type: boolean) Statistics: Num rows: 31942874 Data size: 4325706828 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2)) + expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) outputColumnNames: rank_window_1, _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 31942874 Data size: 4325706828 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((_col0 > 0)) THEN (((abs((_col6 - _col0)) / _col0) > 0.1)) ELSE (null) END (type: boolean) Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) + expressions: _col5 (type: string), _col4 (type: string), _col3 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -588,38 +584,34 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int), _col5 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int) + sort order: +++++ + Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) - sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(17,2)) + value expressions: _col5 (type: decimal(17,2)) Reducer 24 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: int, _col5: decimal(17,2) + output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS FIRST, _col4 ASC NULLS FIRST - partition by: _col0, _col1, _col2 + order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col4, _col3, _col2 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col3, _col4 + arguments: _col0, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -629,7 +621,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) + expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -668,38 +660,34 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int), _col5 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int) + sort order: +++++ + Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) - sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(17,2)) + value expressions: _col5 (type: decimal(17,2)) Reducer 5 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: int, _col5: decimal(17,2) + output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS FIRST, _col4 ASC NULLS FIRST - partition by: _col0, _col1, _col2 + order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col4, _col3, _col2 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col3, _col4 + arguments: _col0, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -709,7 +697,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) + expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git ql/src/test/results/clientpositive/perf/spark/query89.q.out ql/src/test/results/clientpositive/perf/spark/query89.q.out index baec217..c933fe1 100644 --- ql/src/test/results/clientpositive/perf/spark/query89.q.out +++ ql/src/test/results/clientpositive/perf/spark/query89.q.out @@ -203,33 +203,29 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col3 (type: int), _col6 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Reduce Output Operator + key expressions: _col2 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: int), _col6 (type: decimal(17,2)) + value expressions: _col1 (type: string), _col3 (type: int), _col6 (type: decimal(17,2)) Reducer 5 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(17,2)) + expressions: KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col0 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col4 ASC NULLS FIRST - partition by: _col0, _col2, _col3, _col4 + order by: _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST + partition by: _col2, _col0, _col4, _col5 raw input shape: window functions: window function definition @@ -240,14 +236,14 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: decimal(17,2)) + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((avg_window_0 <> 0)) THEN (((abs((_col6 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END (type: boolean) Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)), (_col6 - avg_window_0) (type: decimal(22,6)) + expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col3 (type: int), _col6 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)), (_col6 - avg_window_0) (type: decimal(22,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git ql/src/test/results/clientpositive/perf/spark/query98.q.out ql/src/test/results/clientpositive/perf/spark/query98.q.out index 5264bc0..9513434 100644 --- ql/src/test/results/clientpositive/perf/spark/query98.q.out +++ ql/src/test/results/clientpositive/perf/spark/query98.q.out @@ -176,20 +176,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)) + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)) Reducer 4 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: decimal(7,2)), VALUE._col4 (type: decimal(17,2)) + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: decimal(7,2)), VALUE._col4 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE PTF Operator @@ -201,8 +197,8 @@ STAGE PLANS: Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS FIRST - partition by: _col3 + order by: _col1 ASC NULLS FIRST + partition by: _col1 raw input shape: window functions: window function definition @@ -213,7 +209,7 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)), ((_col5 * 100) / sum_window_0) (type: decimal(38,17)), _col0 (type: string) + expressions: _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)), ((_col5 * 100) / sum_window_0) (type: decimal(38,17)), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git ql/src/test/results/clientpositive/perf/tez/query12.q.out ql/src/test/results/clientpositive/perf/tez/query12.q.out index fe18e03..b1f5926 100644 --- ql/src/test/results/clientpositive/perf/tez/query12.q.out +++ ql/src/test/results/clientpositive/perf/tez/query12.q.out @@ -76,63 +76,61 @@ Stage-0 limit:-1 Stage-1 Reducer 6 - File Output Operator [FS_29] - Limit [LIM_27] (rows=100 width=135) + File Output Operator [FS_28] + Limit [LIM_26] (rows=100 width=135) Number of rows:100 - Select Operator [SEL_26] (rows=87121617 width=135) + Select Operator [SEL_25] (rows=87121617 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_25] - Select Operator [SEL_23] (rows=87121617 width=135) + SHUFFLE [RS_24] + Select Operator [SEL_22] (rows=87121617 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_22] (rows=87121617 width=135) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST","partition by:":"_col3"}] - Select Operator [SEL_21] (rows=87121617 width=135) + PTF Operator [PTF_21] (rows=87121617 width=135) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] + Select Operator [SEL_20] (rows=87121617 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_20] - PartitionCols:_col3 - Select Operator [SEL_19] (rows=87121617 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_18] (rows=87121617 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] - PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_16] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col10, _col9, _col6, _col7, _col8 - Merge Join Operator [MERGEJOIN_39] (rows=174243235 width=135) - Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=231000 width=1436) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_37] (rows=231000 width=1436) - predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_38] (rows=158402938 width=135) - Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_9] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=144002668 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_35] (rows=144002668 width=135) - predicate:(ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_sales_price"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_36] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00.0' AND TIMESTAMP'2001-02-11 00:00:00.0' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + SHUFFLE [RS_19] + PartitionCols:_col1 + Group By Operator [GBY_18] (rows=87121617 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_16] (rows=174243235 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col10, _col9, _col6, _col7, _col8 + Merge Join Operator [MERGEJOIN_38] (rows=174243235 width=135) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=231000 width=1436) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_36] (rows=231000 width=1436) + predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) + TableScan [TS_6] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_37] (rows=158402938 width=135) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=144002668 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_34] (rows=144002668 width=135) + predicate:(ws_item_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_0] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_sales_price"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=8116 width=1119) + Output:["_col0"] + Filter Operator [FIL_35] (rows=8116 width=1119) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00.0' AND TIMESTAMP'2001-02-11 00:00:00.0' and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] diff --git ql/src/test/results/clientpositive/perf/tez/query19.q.out ql/src/test/results/clientpositive/perf/tez/query19.q.out index 73bb6d9..71bd1b3 100644 --- ql/src/test/results/clientpositive/perf/tez/query19.q.out +++ ql/src/test/results/clientpositive/perf/tez/query19.q.out @@ -62,13 +62,13 @@ Stage-0 limit:-1 Stage-1 Reducer 6 - File Output Operator [FS_45] - Limit [LIM_43] (rows=100 width=88) + File Output Operator [FS_44] + Limit [LIM_42] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_42] (rows=421657640 width=88) + Select Operator [SEL_41] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_41] + SHUFFLE [RS_40] Select Operator [SEL_39] (rows=421657640 width=88) Output:["_col2","_col3","_col4","_col5","_col6"] Group By Operator [GBY_38] (rows=421657640 width=88) @@ -82,47 +82,47 @@ Stage-0 Output:["_col8","_col13","_col14","_col15","_col16"] Filter Operator [FIL_34] (rows=843315281 width=88) predicate:(substr(_col3, 1, 5) <> substr(_col19, 1, 5)) - Merge Join Operator [MERGEJOIN_74] (rows=843315281 width=88) + Merge Join Operator [MERGEJOIN_73] (rows=843315281 width=88) Conds:RS_31._col7=RS_32._col0(Inner),Output:["_col3","_col8","_col13","_col14","_col15","_col16","_col19"] <-Map 13 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col0 Select Operator [SEL_24] (rows=1704 width=1910) Output:["_col0","_col1"] - Filter Operator [FIL_69] (rows=1704 width=1910) + Filter Operator [FIL_68] (rows=1704 width=1910) predicate:s_store_sk is not null TableScan [TS_22] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_zip"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_73] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_72] (rows=766650239 width=88) Conds:RS_28._col0=RS_29._col2(Inner),Output:["_col3","_col7","_col8","_col13","_col14","_col15","_col16"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_72] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_71] (rows=696954748 width=88) Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col2","_col3","_col4","_col9","_col10","_col11","_col12"] <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_14] (rows=231000 width=1436) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_68] (rows=231000 width=1436) + Filter Operator [FIL_67] (rows=231000 width=1436) predicate:((i_manager_id = 7) and i_item_sk is not null) TableScan [TS_12] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_brand","i_manufact_id","i_manufact","i_manager_id"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_71] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_70] (rows=633595212 width=88) Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 Select Operator [SEL_11] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_67] (rows=18262 width=1119) + Filter Operator [FIL_66] (rows=18262 width=1119) predicate:((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] @@ -131,21 +131,21 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_8] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_66] (rows=575995635 width=88) + Filter Operator [FIL_65] (rows=575995635 width=88) predicate:(ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_6] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ext_sales_price"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_70] (rows=88000001 width=860) + Merge Join Operator [MERGEJOIN_69] (rows=88000001 width=860) Conds:RS_25._col1=RS_26._col0(Inner),Output:["_col0","_col3"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col1 Select Operator [SEL_2] (rows=80000000 width=860) Output:["_col0","_col1"] - Filter Operator [FIL_64] (rows=80000000 width=860) + Filter Operator [FIL_63] (rows=80000000 width=860) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) TableScan [TS_0] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] @@ -154,7 +154,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_5] (rows=40000000 width=1014) Output:["_col0","_col1"] - Filter Operator [FIL_65] (rows=40000000 width=1014) + Filter Operator [FIL_64] (rows=40000000 width=1014) predicate:ca_address_sk is not null TableScan [TS_3] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_zip"] diff --git ql/src/test/results/clientpositive/perf/tez/query20.q.out ql/src/test/results/clientpositive/perf/tez/query20.q.out index 7aee675..e9325b3 100644 --- ql/src/test/results/clientpositive/perf/tez/query20.q.out +++ ql/src/test/results/clientpositive/perf/tez/query20.q.out @@ -68,63 +68,61 @@ Stage-0 limit:-1 Stage-1 Reducer 6 - File Output Operator [FS_29] - Limit [LIM_27] (rows=100 width=135) + File Output Operator [FS_28] + Limit [LIM_26] (rows=100 width=135) Number of rows:100 - Select Operator [SEL_26] (rows=174233858 width=135) + Select Operator [SEL_25] (rows=174233858 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_25] - Select Operator [SEL_23] (rows=174233858 width=135) + SHUFFLE [RS_24] + Select Operator [SEL_22] (rows=174233858 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_22] (rows=174233858 width=135) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST","partition by:":"_col3"}] - Select Operator [SEL_21] (rows=174233858 width=135) + PTF Operator [PTF_21] (rows=174233858 width=135) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] + Select Operator [SEL_20] (rows=174233858 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_20] - PartitionCols:_col3 - Select Operator [SEL_19] (rows=174233858 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_18] (rows=174233858 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] - PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_16] (rows=348467716 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col10, _col9, _col6, _col7, _col8 - Merge Join Operator [MERGEJOIN_39] (rows=348467716 width=135) - Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=231000 width=1436) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_37] (rows=231000 width=1436) - predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_38] (rows=316788826 width=135) - Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_9] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_35] (rows=287989836 width=135) - predicate:(cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_sales_price"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_36] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00.0' AND TIMESTAMP'2001-02-11 00:00:00.0' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + SHUFFLE [RS_19] + PartitionCols:_col1 + Group By Operator [GBY_18] (rows=174233858 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_16] (rows=348467716 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col10, _col9, _col6, _col7, _col8 + Merge Join Operator [MERGEJOIN_38] (rows=348467716 width=135) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=231000 width=1436) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_36] (rows=231000 width=1436) + predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) + TableScan [TS_6] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_37] (rows=316788826 width=135) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=287989836 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_34] (rows=287989836 width=135) + predicate:(cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_sales_price"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=8116 width=1119) + Output:["_col0"] + Filter Operator [FIL_35] (rows=8116 width=1119) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00.0' AND TIMESTAMP'2001-02-11 00:00:00.0' and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] diff --git ql/src/test/results/clientpositive/perf/tez/query22.q.out ql/src/test/results/clientpositive/perf/tez/query22.q.out index 9a04651..19e0355 100644 --- ql/src/test/results/clientpositive/perf/tez/query22.q.out +++ ql/src/test/results/clientpositive/perf/tez/query22.q.out @@ -54,13 +54,13 @@ Stage-0 limit:100 Stage-1 Reducer 6 - File Output Operator [FS_30] - Limit [LIM_29] (rows=100 width=15) + File Output Operator [FS_29] + Limit [LIM_28] (rows=100 width=15) Number of rows:100 - Select Operator [SEL_28] (rows=125060762 width=15) + Select Operator [SEL_27] (rows=125060762 width=15) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_27] + SHUFFLE [RS_26] Select Operator [SEL_25] (rows=125060762 width=15) Output:["_col0","_col1","_col2","_col3","_col4"] Group By Operator [GBY_24] (rows=125060762 width=15) @@ -70,42 +70,42 @@ Stage-0 PartitionCols:_col0, _col1, _col2, _col3, _col4 Group By Operator [GBY_22] (rows=250121525 width=15) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)"],keys:_col8, _col9, _col10, _col11, 0L - Merge Join Operator [MERGEJOIN_46] (rows=50024305 width=15) + Merge Join Operator [MERGEJOIN_45] (rows=50024305 width=15) Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col3","_col8","_col9","_col10","_col11"] <-Map 9 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_11] (rows=462000 width=1436) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_43] (rows=462000 width=1436) + Filter Operator [FIL_42] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_9] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_45] (rows=45476640 width=15) + Merge Join Operator [MERGEJOIN_44] (rows=45476640 width=15) Conds:RS_15._col2=RS_16._col0(Inner),Output:["_col1","_col3"] <-Map 8 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 Select Operator [SEL_8] (rows=27 width=1029) Output:["_col0"] - Filter Operator [FIL_42] (rows=27 width=1029) + Filter Operator [FIL_41] (rows=27 width=1029) predicate:w_warehouse_sk is not null TableScan [TS_6] (rows=27 width=1029) default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_44] (rows=41342400 width=15) + Merge Join Operator [MERGEJOIN_43] (rows=41342400 width=15) Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col0 Select Operator [SEL_2] (rows=37584000 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_40] (rows=37584000 width=15) + Filter Operator [FIL_39] (rows=37584000 width=15) predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) TableScan [TS_0] (rows=37584000 width=15) default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] @@ -114,7 +114,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_41] (rows=8116 width=1119) + Filter Operator [FIL_40] (rows=8116 width=1119) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] diff --git ql/src/test/results/clientpositive/perf/tez/query3.q.out ql/src/test/results/clientpositive/perf/tez/query3.q.out index 462f05d..7d2b7f2 100644 --- ql/src/test/results/clientpositive/perf/tez/query3.q.out +++ ql/src/test/results/clientpositive/perf/tez/query3.q.out @@ -51,13 +51,13 @@ Stage-0 limit:100 Stage-1 Reducer 5 - File Output Operator [FS_24] - Limit [LIM_23] (rows=100 width=88) + File Output Operator [FS_23] + Limit [LIM_22] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_22] (rows=348477374 width=88) + Select Operator [SEL_21] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_21] + SHUFFLE [RS_20] Group By Operator [GBY_18] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] @@ -65,28 +65,28 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_16] (rows=696954748 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col8, _col4, _col5 - Merge Join Operator [MERGEJOIN_34] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_33] (rows=696954748 width=88) Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col2","_col4","_col5","_col8"] <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col0 Select Operator [SEL_8] (rows=36524 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_32] (rows=36524 width=1119) + Filter Operator [FIL_31] (rows=36524 width=1119) predicate:((d_moy = 12) and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=1119) default@date_dim,dt,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_33] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_32] (rows=633595212 width=88) Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0","_col2","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_9] PartitionCols:_col1 Select Operator [SEL_2] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_30] (rows=575995635 width=88) + Filter Operator [FIL_29] (rows=575995635 width=88) predicate:(ss_item_sk is not null and ss_sold_date_sk is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] @@ -95,7 +95,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_5] (rows=231000 width=1436) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_31] (rows=231000 width=1436) + Filter Operator [FIL_30] (rows=231000 width=1436) predicate:((i_manufact_id = 436) and i_item_sk is not null) TableScan [TS_3] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_brand","i_manufact_id"] diff --git ql/src/test/results/clientpositive/perf/tez/query46.q.out ql/src/test/results/clientpositive/perf/tez/query46.q.out index 1a193ed..3659824 100644 --- ql/src/test/results/clientpositive/perf/tez/query46.q.out +++ ql/src/test/results/clientpositive/perf/tez/query46.q.out @@ -83,44 +83,44 @@ Stage-0 limit:100 Stage-1 Reducer 4 - File Output Operator [FS_50] - Limit [LIM_49] (rows=100 width=88) + File Output Operator [FS_49] + Limit [LIM_48] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_48] (rows=463823414 width=88) + Select Operator [SEL_47] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_47] - Select Operator [SEL_46] (rows=463823414 width=88) + SHUFFLE [RS_46] + Select Operator [SEL_45] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_45] (rows=463823414 width=88) + Filter Operator [FIL_44] (rows=463823414 width=88) predicate:(_col5 <> _col8) - Merge Join Operator [MERGEJOIN_86] (rows=463823414 width=88) - Conds:RS_42._col0=RS_43._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_85] (rows=463823414 width=88) + Conds:RS_41._col0=RS_42._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_42] + SHUFFLE [RS_41] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_81] (rows=88000001 width=860) - Conds:RS_39._col1=RS_40._col0(Inner),Output:["_col0","_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_80] (rows=88000001 width=860) + Conds:RS_38._col1=RS_39._col0(Inner),Output:["_col0","_col2","_col3","_col5"] <-Map 5 [SIMPLE_EDGE] - SHUFFLE [RS_40] + SHUFFLE [RS_39] PartitionCols:_col0 Select Operator [SEL_5] (rows=40000000 width=1014) Output:["_col0","_col1"] - Filter Operator [FIL_75] (rows=40000000 width=1014) + Filter Operator [FIL_74] (rows=40000000 width=1014) predicate:ca_address_sk is not null TableScan [TS_3] (rows=40000000 width=1014) default@customer_address,current_addr,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_city"] <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_39] + SHUFFLE [RS_38] PartitionCols:_col1 Select Operator [SEL_2] (rows=80000000 width=860) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_74] (rows=80000000 width=860) + Filter Operator [FIL_73] (rows=80000000 width=860) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) TableScan [TS_0] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name"] <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_43] + SHUFFLE [RS_42] PartitionCols:_col1 Select Operator [SEL_37] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] @@ -131,7 +131,7 @@ Stage-0 PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_34] (rows=843315281 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","sum(_col7)"],keys:_col1, _col17, _col3, _col5 - Merge Join Operator [MERGEJOIN_85] (rows=843315281 width=88) + Merge Join Operator [MERGEJOIN_84] (rows=843315281 width=88) Conds:RS_30._col3=RS_31._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col17"] <-Map 5 [SIMPLE_EDGE] SHUFFLE [RS_31] @@ -140,42 +140,42 @@ Stage-0 <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_84] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_83] (rows=766650239 width=88) Conds:RS_27._col2=RS_28._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7"] <-Map 14 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col0 Select Operator [SEL_17] (rows=7200 width=107) Output:["_col0"] - Filter Operator [FIL_79] (rows=7200 width=107) + Filter Operator [FIL_78] (rows=7200 width=107) predicate:(((hd_dep_count = 2) or (hd_vehicle_count = 1)) and hd_demo_sk is not null) TableScan [TS_15] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_83] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_82] (rows=696954748 width=88) Conds:RS_24._col4=RS_25._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] <-Map 13 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0 Select Operator [SEL_14] (rows=852 width=1910) Output:["_col0"] - Filter Operator [FIL_78] (rows=852 width=1910) + Filter Operator [FIL_77] (rows=852 width=1910) predicate:((s_city) IN ('Cedar Grove', 'Wildwood', 'Union', 'Salem', 'Highland Park') and s_store_sk is not null) TableScan [TS_12] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_city"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_82] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_81] (rows=633595212 width=88) Conds:RS_21._col0=RS_22._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0 Select Operator [SEL_11] (rows=18263 width=1119) Output:["_col0"] - Filter Operator [FIL_77] (rows=18263 width=1119) + Filter Operator [FIL_76] (rows=18263 width=1119) predicate:((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dow"] @@ -184,7 +184,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_8] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_76] (rows=575995635 width=88) + Filter Operator [FIL_75] (rows=575995635 width=88) predicate:(ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_6] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_coupon_amt","ss_net_profit"] diff --git ql/src/test/results/clientpositive/perf/tez/query47.q.out ql/src/test/results/clientpositive/perf/tez/query47.q.out index a042691..ba68d7e 100644 --- ql/src/test/results/clientpositive/perf/tez/query47.q.out +++ ql/src/test/results/clientpositive/perf/tez/query47.q.out @@ -138,80 +138,78 @@ Stage-0 Select Operator [SEL_66] (rows=63887519 width=88) Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Filter Operator [FIL_170] (rows=63887519 width=88) - predicate:((_col0 > 0) and (_col5 = 2000) and rank_window_1 is not null) + predicate:((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null) PTF Operator [PTF_65] (rows=383325119 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST, _col6 ASC NULLS FIRST","partition by:":"_col1, _col2, _col3, _col4"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col4, _col3, _col5, _col6"}] Select Operator [SEL_64] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_63] - PartitionCols:_col0, _col1, _col2, _col3 + PartitionCols:_col3, _col2, _col4, _col5 Select Operator [SEL_62] (rows=383325119 width=88) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] PTF Operator [PTF_61] (rows=383325119 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col4 ASC NULLS FIRST","partition by:":"_col0, _col1, _col2, _col3, _col4"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST, _col0 ASC NULLS FIRST","partition by:":"_col3, _col2, _col4, _col5, _col0"}] Select Operator [SEL_60] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_59] - PartitionCols:_col0, _col1, _col2, _col3, _col4 - Select Operator [SEL_95] (rows=383325119 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_94] (rows=383325119 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_93] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_92] (rows=766650239 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col8, _col9, _col11, _col12 - Merge Join Operator [MERGEJOIN_188] (rows=766650239 width=88) - Conds:RS_88._col2=RS_89._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col11","_col12"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_89] - PartitionCols:_col0 - Select Operator [SEL_81] (rows=1704 width=1910) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_179] (rows=1704 width=1910) - predicate:(s_company_name is not null and s_store_name is not null and s_store_sk is not null) - TableScan [TS_79] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_88] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_187] (rows=696954748 width=88) - Conds:RS_85._col1=RS_86._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_86] - PartitionCols:_col0 - Select Operator [SEL_78] (rows=462000 width=1436) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_178] (rows=462000 width=1436) - predicate:(i_brand is not null and i_category is not null and i_item_sk is not null) - TableScan [TS_76] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_category"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_85] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_186] (rows=633595212 width=88) - Conds:RS_82._col0=RS_83._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_82] - PartitionCols:_col0 - Select Operator [SEL_72] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_176] (rows=575995635 width=88) - predicate:(ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_70] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_83] - PartitionCols:_col0 - Select Operator [SEL_75] (rows=73048 width=1119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_177] (rows=73048 width=1119) - predicate:(((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) - TableScan [TS_73] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + PartitionCols:_col3, _col2, _col4, _col5, _col0 + Group By Operator [GBY_94] (rows=383325119 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_93] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_92] (rows=766650239 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col8, _col9, _col11, _col12 + Merge Join Operator [MERGEJOIN_188] (rows=766650239 width=88) + Conds:RS_88._col2=RS_89._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col11","_col12"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_89] + PartitionCols:_col0 + Select Operator [SEL_81] (rows=1704 width=1910) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_179] (rows=1704 width=1910) + predicate:(s_company_name is not null and s_store_name is not null and s_store_sk is not null) + TableScan [TS_79] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_company_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_88] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_187] (rows=696954748 width=88) + Conds:RS_85._col1=RS_86._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_86] + PartitionCols:_col0 + Select Operator [SEL_78] (rows=462000 width=1436) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_178] (rows=462000 width=1436) + predicate:(i_brand is not null and i_category is not null and i_item_sk is not null) + TableScan [TS_76] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_category"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_85] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_186] (rows=633595212 width=88) + Conds:RS_82._col0=RS_83._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_82] + PartitionCols:_col0 + Select Operator [SEL_72] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_176] (rows=575995635 width=88) + predicate:(ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_70] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_83] + PartitionCols:_col0 + Select Operator [SEL_75] (rows=73048 width=1119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_177] (rows=73048 width=1119) + predicate:(((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) + TableScan [TS_73] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_105] PartitionCols:_col0, _col1, _col2, _col3, (_col5 - 1) @@ -220,13 +218,13 @@ Stage-0 Filter Operator [FIL_175] (rows=383325119 width=88) predicate:rank_window_0 is not null PTF Operator [PTF_98] (rows=383325119 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST","partition by:":"_col0, _col1, _col2, _col3"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST","partition by:":"_col3, _col2, _col4, _col5"}] Select Operator [SEL_97] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_96] - PartitionCols:_col0, _col1, _col2, _col3 - Please refer to the previous Select Operator [SEL_95] + PartitionCols:_col3, _col2, _col4, _col5 + Please refer to the previous Group By Operator [GBY_94] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_103] PartitionCols:_col0, _col1, _col2, _col3, (_col5 + 1) @@ -235,11 +233,11 @@ Stage-0 Filter Operator [FIL_164] (rows=383325119 width=88) predicate:rank_window_0 is not null PTF Operator [PTF_28] (rows=383325119 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST","partition by:":"_col0, _col1, _col2, _col3"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST","partition by:":"_col3, _col2, _col4, _col5"}] Select Operator [SEL_27] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_26] - PartitionCols:_col0, _col1, _col2, _col3 - Please refer to the previous Select Operator [SEL_95] + PartitionCols:_col3, _col2, _col4, _col5 + Please refer to the previous Group By Operator [GBY_94] diff --git ql/src/test/results/clientpositive/perf/tez/query52.q.out ql/src/test/results/clientpositive/perf/tez/query52.q.out index b14ab53..fba6783 100644 --- ql/src/test/results/clientpositive/perf/tez/query52.q.out +++ ql/src/test/results/clientpositive/perf/tez/query52.q.out @@ -53,15 +53,15 @@ Stage-0 limit:-1 Stage-1 Reducer 5 - File Output Operator [FS_25] - Select Operator [SEL_24] (rows=100 width=88) + File Output Operator [FS_24] + Select Operator [SEL_23] (rows=100 width=88) Output:["_col0","_col1","_col2","_col3"] - Limit [LIM_23] (rows=100 width=88) + Limit [LIM_22] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_22] (rows=348477374 width=88) + Select Operator [SEL_21] (rows=348477374 width=88) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_21] + SHUFFLE [RS_20] Group By Operator [GBY_18] (rows=348477374 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] @@ -69,28 +69,28 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_16] (rows=696954748 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col7, _col8 - Merge Join Operator [MERGEJOIN_35] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_34] (rows=696954748 width=88) Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col2","_col7","_col8"] <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col0 Select Operator [SEL_8] (rows=231000 width=1436) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_33] (rows=231000 width=1436) + Filter Operator [FIL_32] (rows=231000 width=1436) predicate:((i_manager_id = 1) and i_item_sk is not null) TableScan [TS_6] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_34] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_33] (rows=633595212 width=88) Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1","_col2"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_9] PartitionCols:_col0 Select Operator [SEL_2] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_31] (rows=575995635 width=88) + Filter Operator [FIL_30] (rows=575995635 width=88) predicate:(ss_item_sk is not null and ss_sold_date_sk is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] @@ -99,7 +99,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_5] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_32] (rows=18262 width=1119) + Filter Operator [FIL_31] (rows=18262 width=1119) predicate:((d_moy = 12) and (d_year = 1998) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,dt,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] diff --git ql/src/test/results/clientpositive/perf/tez/query55.q.out ql/src/test/results/clientpositive/perf/tez/query55.q.out index 5a5e33a..c9c6fa2 100644 --- ql/src/test/results/clientpositive/perf/tez/query55.q.out +++ ql/src/test/results/clientpositive/perf/tez/query55.q.out @@ -37,13 +37,13 @@ Stage-0 limit:-1 Stage-1 Reducer 5 - File Output Operator [FS_25] - Limit [LIM_23] (rows=100 width=88) + File Output Operator [FS_24] + Limit [LIM_22] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_22] (rows=348477374 width=88) + Select Operator [SEL_21] (rows=348477374 width=88) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_21] + SHUFFLE [RS_20] Select Operator [SEL_19] (rows=348477374 width=88) Output:["_col1","_col2","_col3"] Group By Operator [GBY_18] (rows=348477374 width=88) @@ -53,28 +53,28 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_16] (rows=696954748 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col7, _col8 - Merge Join Operator [MERGEJOIN_35] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_34] (rows=696954748 width=88) Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col2","_col7","_col8"] <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col0 Select Operator [SEL_8] (rows=231000 width=1436) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_33] (rows=231000 width=1436) + Filter Operator [FIL_32] (rows=231000 width=1436) predicate:((i_manager_id = 36) and i_item_sk is not null) TableScan [TS_6] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_34] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_33] (rows=633595212 width=88) Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1","_col2"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_9] PartitionCols:_col0 Select Operator [SEL_2] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_31] (rows=575995635 width=88) + Filter Operator [FIL_30] (rows=575995635 width=88) predicate:(ss_item_sk is not null and ss_sold_date_sk is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] @@ -83,7 +83,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_5] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_32] (rows=18262 width=1119) + Filter Operator [FIL_31] (rows=18262 width=1119) predicate:((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] diff --git ql/src/test/results/clientpositive/perf/tez/query57.q.out ql/src/test/results/clientpositive/perf/tez/query57.q.out index 985d588..3c06fde 100644 --- ql/src/test/results/clientpositive/perf/tez/query57.q.out +++ ql/src/test/results/clientpositive/perf/tez/query57.q.out @@ -132,80 +132,78 @@ Stage-0 Select Operator [SEL_66] (rows=31942874 width=135) Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] Filter Operator [FIL_170] (rows=31942874 width=135) - predicate:((_col0 > 0) and (_col4 = 2000) and rank_window_1 is not null) + predicate:((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null) PTF Operator [PTF_65] (rows=191657247 width=135) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST","partition by:":"_col1, _col2, _col3"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col5, _col4, _col3"}] Select Operator [SEL_64] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_63] - PartitionCols:_col0, _col1, _col2 + PartitionCols:_col4, _col3, _col2 Select Operator [SEL_62] (rows=191657247 width=135) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5"] PTF Operator [PTF_61] (rows=191657247 width=135) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col3 ASC NULLS FIRST","partition by:":"_col0, _col1, _col2, _col3"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST","partition by:":"_col4, _col3, _col2, _col0"}] Select Operator [SEL_60] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_59] - PartitionCols:_col0, _col1, _col2, _col3 - Select Operator [SEL_95] (rows=191657247 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_94] (rows=191657247 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_93] - PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_92] (rows=383314495 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col8, _col10, _col11 - Merge Join Operator [MERGEJOIN_188] (rows=383314495 width=135) - Conds:RS_88._col2=RS_89._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col10","_col11"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_89] - PartitionCols:_col0 - Select Operator [SEL_81] (rows=462000 width=1436) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_179] (rows=462000 width=1436) - predicate:(i_brand is not null and i_category is not null and i_item_sk is not null) - TableScan [TS_79] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_category"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_88] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_187] (rows=348467716 width=135) - Conds:RS_85._col1=RS_86._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_86] - PartitionCols:_col0 - Select Operator [SEL_78] (rows=60 width=2045) - Output:["_col0","_col1"] - Filter Operator [FIL_178] (rows=60 width=2045) - predicate:(cc_call_center_sk is not null and cc_name is not null) - TableScan [TS_76] (rows=60 width=2045) - default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_name"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_85] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_186] (rows=316788826 width=135) - Conds:RS_82._col0=RS_83._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_82] - PartitionCols:_col0 - Select Operator [SEL_72] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_176] (rows=287989836 width=135) - predicate:(cs_call_center_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_70] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_item_sk","cs_sales_price"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_83] - PartitionCols:_col0 - Select Operator [SEL_75] (rows=73048 width=1119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_177] (rows=73048 width=1119) - predicate:(((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) - TableScan [TS_73] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + PartitionCols:_col4, _col3, _col2, _col0 + Group By Operator [GBY_94] (rows=191657247 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_93] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_92] (rows=383314495 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col8, _col10, _col11 + Merge Join Operator [MERGEJOIN_188] (rows=383314495 width=135) + Conds:RS_88._col2=RS_89._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col10","_col11"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_89] + PartitionCols:_col0 + Select Operator [SEL_81] (rows=462000 width=1436) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_179] (rows=462000 width=1436) + predicate:(i_brand is not null and i_category is not null and i_item_sk is not null) + TableScan [TS_79] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_category"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_88] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_187] (rows=348467716 width=135) + Conds:RS_85._col1=RS_86._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_86] + PartitionCols:_col0 + Select Operator [SEL_78] (rows=60 width=2045) + Output:["_col0","_col1"] + Filter Operator [FIL_178] (rows=60 width=2045) + predicate:(cc_call_center_sk is not null and cc_name is not null) + TableScan [TS_76] (rows=60 width=2045) + default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_name"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_85] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_186] (rows=316788826 width=135) + Conds:RS_82._col0=RS_83._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_82] + PartitionCols:_col0 + Select Operator [SEL_72] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_176] (rows=287989836 width=135) + predicate:(cs_call_center_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_70] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_item_sk","cs_sales_price"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_83] + PartitionCols:_col0 + Select Operator [SEL_75] (rows=73048 width=1119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_177] (rows=73048 width=1119) + predicate:(((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) + TableScan [TS_73] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_105] PartitionCols:_col0, _col1, _col2, (_col4 - 1) @@ -214,13 +212,13 @@ Stage-0 Filter Operator [FIL_175] (rows=191657247 width=135) predicate:rank_window_0 is not null PTF Operator [PTF_98] (rows=191657247 width=135) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col4 ASC NULLS FIRST","partition by:":"_col0, _col1, _col2"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST","partition by:":"_col4, _col3, _col2"}] Select Operator [SEL_97] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_96] - PartitionCols:_col0, _col1, _col2 - Please refer to the previous Select Operator [SEL_95] + PartitionCols:_col4, _col3, _col2 + Please refer to the previous Group By Operator [GBY_94] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_103] PartitionCols:_col0, _col1, _col2, (_col4 + 1) @@ -229,11 +227,11 @@ Stage-0 Filter Operator [FIL_164] (rows=191657247 width=135) predicate:rank_window_0 is not null PTF Operator [PTF_28] (rows=191657247 width=135) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col4 ASC NULLS FIRST","partition by:":"_col0, _col1, _col2"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST","partition by:":"_col4, _col3, _col2"}] Select Operator [SEL_27] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_26] - PartitionCols:_col0, _col1, _col2 - Please refer to the previous Select Operator [SEL_95] + PartitionCols:_col4, _col3, _col2 + Please refer to the previous Group By Operator [GBY_94] diff --git ql/src/test/results/clientpositive/perf/tez/query64.q.out ql/src/test/results/clientpositive/perf/tez/query64.q.out index 26ba737..06a289d 100644 --- ql/src/test/results/clientpositive/perf/tez/query64.q.out +++ ql/src/test/results/clientpositive/perf/tez/query64.q.out @@ -274,53 +274,53 @@ Stage-0 limit:-1 Stage-1 Reducer 11 - File Output Operator [FS_264] - Select Operator [SEL_262] (rows=273897192 width=88) + File Output Operator [FS_262] + Select Operator [SEL_260] (rows=273897192 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_261] - Select Operator [SEL_260] (rows=273897192 width=88) + SHUFFLE [RS_259] + Select Operator [SEL_258] (rows=273897192 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] - Filter Operator [FIL_259] (rows=273897192 width=88) + Filter Operator [FIL_257] (rows=273897192 width=88) predicate:(_col19 <= _col12) - Merge Join Operator [MERGEJOIN_615] (rows=821691577 width=88) - Conds:RS_256._col2, _col1, _col3=RS_257._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] + Merge Join Operator [MERGEJOIN_613] (rows=821691577 width=88) + Conds:RS_254._col2, _col1, _col3=RS_255._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_257] + SHUFFLE [RS_255] PartitionCols:_col1, _col0, _col2 - Select Operator [SEL_254] (rows=746992327 width=88) + Select Operator [SEL_253] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_253] (rows=746992327 width=88) + Group By Operator [GBY_252] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_252] + SHUFFLE [RS_251] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Group By Operator [GBY_251] (rows=1493984654 width=88) + Group By Operator [GBY_250] (rows=1493984654 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col43)","sum(_col44)","sum(_col45)"],keys:_col28, _col46, _col29, _col7, _col9, _col14, _col15, _col16, _col17, _col23, _col24, _col25, _col26, _col49 - Select Operator [SEL_250] (rows=1493984654 width=88) + Select Operator [SEL_249] (rows=1493984654 width=88) Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49"] - Filter Operator [FIL_249] (rows=1493984654 width=88) + Filter Operator [FIL_248] (rows=1493984654 width=88) predicate:(_col56 <> _col19) - Merge Join Operator [MERGEJOIN_614] (rows=1493984654 width=88) - Conds:RS_246._col37=RS_247._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] + Merge Join Operator [MERGEJOIN_612] (rows=1493984654 width=88) + Conds:RS_245._col37=RS_246._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] <-Map 44 [SIMPLE_EDGE] - SHUFFLE [RS_247] + SHUFFLE [RS_246] PartitionCols:_col0 Select Operator [SEL_99] (rows=1861800 width=385) Output:["_col0","_col1"] - Filter Operator [FIL_559] (rows=1861800 width=385) + Filter Operator [FIL_557] (rows=1861800 width=385) predicate:cd_demo_sk is not null TableScan [TS_97] (rows=1861800 width=385) default@customer_demographics,cd1,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status"] <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_246] + SHUFFLE [RS_245] PartitionCols:_col37 - Merge Join Operator [MERGEJOIN_613] (rows=1358167838 width=88) - Conds:RS_243._col0=RS_244._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col49"] + Merge Join Operator [MERGEJOIN_611] (rows=1358167838 width=88) + Conds:RS_242._col0=RS_243._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col49"] <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_243] + SHUFFLE [RS_242] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_584] (rows=128840811 width=860) + Merge Join Operator [MERGEJOIN_582] (rows=128840811 width=860) Conds:RS_112._col1=RS_113._col0(Inner),Output:["_col0","_col7","_col9","_col14","_col15","_col16","_col17","_col19"] <-Map 44 [SIMPLE_EDGE] SHUFFLE [RS_113] @@ -329,33 +329,33 @@ Stage-0 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_112] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_583] (rows=117128008 width=860) + Merge Join Operator [MERGEJOIN_581] (rows=117128008 width=860) Conds:RS_109._col3=RS_110._col0(Inner),Output:["_col0","_col1","_col7","_col9","_col14","_col15","_col16","_col17"] <-Map 33 [SIMPLE_EDGE] SHUFFLE [RS_110] PartitionCols:_col0 Select Operator [SEL_21] (rows=40000000 width=1014) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_546] (rows=40000000 width=1014) + Filter Operator [FIL_544] (rows=40000000 width=1014) predicate:ca_address_sk is not null TableScan [TS_19] (rows=40000000 width=1014) default@customer_address,ad2,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_582] (rows=106480005 width=860) + Merge Join Operator [MERGEJOIN_580] (rows=106480005 width=860) Conds:RS_106._col2=RS_107._col0(Inner),Output:["_col0","_col1","_col3","_col7","_col9"] <-Reducer 31 [SIMPLE_EDGE] SHUFFLE [RS_107] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_581] (rows=7920 width=107) + Merge Join Operator [MERGEJOIN_579] (rows=7920 width=107) Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col0"] <-Map 30 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 Select Operator [SEL_11] (rows=7200 width=107) Output:["_col0","_col1"] - Filter Operator [FIL_544] (rows=7200 width=107) + Filter Operator [FIL_542] (rows=7200 width=107) predicate:(hd_demo_sk is not null and hd_income_band_sk is not null) TableScan [TS_9] (rows=7200 width=107) default@household_demographics,hd2,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_income_band_sk"] @@ -364,28 +364,28 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_14] (rows=20 width=12) Output:["_col0"] - Filter Operator [FIL_545] (rows=20 width=12) + Filter Operator [FIL_543] (rows=20 width=12) predicate:ib_income_band_sk is not null TableScan [TS_12] (rows=20 width=12) default@income_band,ib2,Tbl:COMPLETE,Col:NONE,Output:["ib_income_band_sk"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_106] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_580] (rows=96800003 width=860) + Merge Join Operator [MERGEJOIN_578] (rows=96800003 width=860) Conds:RS_103._col4=RS_104._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col7","_col9"] <-Map 15 [SIMPLE_EDGE] SHUFFLE [RS_104] PartitionCols:_col0 Select Operator [SEL_5] (rows=73049 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_542] (rows=73049 width=1119) + Filter Operator [FIL_540] (rows=73049 width=1119) predicate:d_date_sk is not null TableScan [TS_3] (rows=73049 width=1119) default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_103] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_579] (rows=88000001 width=860) + Merge Join Operator [MERGEJOIN_577] (rows=88000001 width=860) Conds:RS_100._col5=RS_101._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col7"] <-Map 15 [SIMPLE_EDGE] SHUFFLE [RS_101] @@ -396,56 +396,56 @@ Stage-0 PartitionCols:_col5 Select Operator [SEL_2] (rows=80000000 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_541] (rows=80000000 width=860) + Filter Operator [FIL_539] (rows=80000000 width=860) predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null and c_first_sales_date_sk is not null and c_first_shipto_date_sk is not null) TableScan [TS_0] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_244] + SHUFFLE [RS_243] PartitionCols:_col16 - Select Operator [SEL_224] (rows=1234698008 width=88) + Select Operator [SEL_223] (rows=1234698008 width=88) Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col16","_col17","_col23","_col24","_col25","_col26","_col29"] - Merge Join Operator [MERGEJOIN_612] (rows=1234698008 width=88) - Conds:RS_221._col5, _col12=RS_222._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] + Merge Join Operator [MERGEJOIN_610] (rows=1234698008 width=88) + Conds:RS_220._col5, _col12=RS_221._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] <-Map 43 [SIMPLE_EDGE] - SHUFFLE [RS_222] + SHUFFLE [RS_221] PartitionCols:_col0, _col1 Select Operator [SEL_77] (rows=57591150 width=77) Output:["_col0","_col1"] - Filter Operator [FIL_558] (rows=57591150 width=77) + Filter Operator [FIL_556] (rows=57591150 width=77) predicate:(sr_item_sk is not null and sr_ticket_number is not null) TableScan [TS_75] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_221] + SHUFFLE [RS_220] PartitionCols:_col5, _col12 - Merge Join Operator [MERGEJOIN_611] (rows=1122452711 width=88) - Conds:RS_218._col9=RS_219._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] + Merge Join Operator [MERGEJOIN_609] (rows=1122452711 width=88) + Conds:RS_217._col9=RS_218._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] <-Map 33 [SIMPLE_EDGE] - SHUFFLE [RS_219] + SHUFFLE [RS_218] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_21] <-Reducer 27 [SIMPLE_EDGE] - SHUFFLE [RS_218] + SHUFFLE [RS_217] PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_610] (rows=1020411534 width=88) - Conds:RS_215._col10=RS_216._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] + Merge Join Operator [MERGEJOIN_608] (rows=1020411534 width=88) + Conds:RS_214._col10=RS_215._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] <-Map 42 [SIMPLE_EDGE] - SHUFFLE [RS_216] + SHUFFLE [RS_215] PartitionCols:_col0 Select Operator [SEL_71] (rows=1704 width=1910) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_556] (rows=1704 width=1910) + Filter Operator [FIL_554] (rows=1704 width=1910) predicate:(s_store_name is not null and s_store_sk is not null and s_zip is not null) TableScan [TS_69] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_zip"] <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_215] + SHUFFLE [RS_214] PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_609] (rows=927646829 width=88) - Conds:RS_212._col5=RS_213._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] + Merge Join Operator [MERGEJOIN_607] (rows=927646829 width=88) + Conds:RS_211._col5=RS_212._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] <-Reducer 40 [ONE_TO_ONE_EDGE] - FORWARD [RS_213] + FORWARD [RS_212] PartitionCols:_col0 Select Operator [SEL_68] (rows=52798137 width=135) Output:["_col0"] @@ -460,14 +460,14 @@ Stage-0 Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 Select Operator [SEL_62] (rows=316788826 width=135) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_589] (rows=316788826 width=135) + Merge Join Operator [MERGEJOIN_587] (rows=316788826 width=135) Conds:RS_59._col0, _col1=RS_60._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] <-Map 38 [SIMPLE_EDGE] SHUFFLE [RS_59] PartitionCols:_col0, _col1 Select Operator [SEL_55] (rows=287989836 width=135) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_554] (rows=287989836 width=135) + Filter Operator [FIL_552] (rows=287989836 width=135) predicate:(cs_item_sk is not null and cs_order_number is not null) TableScan [TS_53] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] @@ -476,59 +476,59 @@ Stage-0 PartitionCols:_col0, _col1 Select Operator [SEL_58] (rows=28798881 width=106) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_555] (rows=28798881 width=106) + Filter Operator [FIL_553] (rows=28798881 width=106) predicate:(cr_item_sk is not null and cr_order_number is not null) TableScan [TS_56] (rows=28798881 width=106) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_212] + SHUFFLE [RS_211] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_608] (rows=843315281 width=88) - Conds:RS_209._col0=RS_210._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] + Merge Join Operator [MERGEJOIN_606] (rows=843315281 width=88) + Conds:RS_208._col0=RS_209._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] <-Reducer 31 [SIMPLE_EDGE] - SHUFFLE [RS_209] + SHUFFLE [RS_208] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_581] + Please refer to the previous Merge Join Operator [MERGEJOIN_579] <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_210] + SHUFFLE [RS_209] PartitionCols:_col5 - Select Operator [SEL_180] (rows=766650239 width=88) + Select Operator [SEL_179] (rows=766650239 width=88) Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col16"] - Merge Join Operator [MERGEJOIN_606] (rows=766650239 width=88) - Conds:RS_177._col7=RS_178._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] + Merge Join Operator [MERGEJOIN_604] (rows=766650239 width=88) + Conds:RS_176._col7=RS_177._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] <-Map 37 [SIMPLE_EDGE] - SHUFFLE [RS_178] + SHUFFLE [RS_177] PartitionCols:_col0 Select Operator [SEL_42] (rows=2300 width=1179) Output:["_col0"] - Filter Operator [FIL_553] (rows=2300 width=1179) + Filter Operator [FIL_551] (rows=2300 width=1179) predicate:p_promo_sk is not null TableScan [TS_40] (rows=2300 width=1179) default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk"] <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_177] + SHUFFLE [RS_176] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_605] (rows=696954748 width=88) - Conds:RS_174._col0=RS_175._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + Merge Join Operator [MERGEJOIN_603] (rows=696954748 width=88) + Conds:RS_173._col0=RS_174._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] <-Map 15 [SIMPLE_EDGE] - SHUFFLE [RS_175] + SHUFFLE [RS_174] PartitionCols:_col0 - Select Operator [SEL_167] (rows=36524 width=1119) + Select Operator [SEL_166] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_571] (rows=36524 width=1119) + Filter Operator [FIL_569] (rows=36524 width=1119) predicate:((d_year = 2001) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Reducer 35 [SIMPLE_EDGE] - SHUFFLE [RS_174] + SHUFFLE [RS_173] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_586] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_584] (rows=633595212 width=88) Conds:RS_43._col1=RS_44._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] <-Map 34 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col1 Select Operator [SEL_33] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_550] (rows=575995635 width=88) + Filter Operator [FIL_548] (rows=575995635 width=88) predicate:(ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_31] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] @@ -537,12 +537,12 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_36] (rows=2851 width=1436) Output:["_col0","_col3"] - Filter Operator [FIL_551] (rows=2851 width=1436) + Filter Operator [FIL_549] (rows=2851 width=1436) predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50 and i_item_sk is not null) TableScan [TS_34] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_256] + SHUFFLE [RS_254] PartitionCols:_col2, _col1, _col3 Select Operator [SEL_126] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] @@ -557,7 +557,7 @@ Stage-0 Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49"] Filter Operator [FIL_121] (rows=1493984654 width=88) predicate:(_col56 <> _col19) - Merge Join Operator [MERGEJOIN_596] (rows=1493984654 width=88) + Merge Join Operator [MERGEJOIN_594] (rows=1493984654 width=88) Conds:RS_118._col37=RS_119._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] <-Map 44 [SIMPLE_EDGE] SHUFFLE [RS_119] @@ -566,18 +566,18 @@ Stage-0 <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_118] PartitionCols:_col37 - Merge Join Operator [MERGEJOIN_595] (rows=1358167838 width=88) + Merge Join Operator [MERGEJOIN_593] (rows=1358167838 width=88) Conds:RS_115._col0=RS_116._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col49"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_115] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_584] + Please refer to the previous Merge Join Operator [MERGEJOIN_582] <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_116] PartitionCols:_col16 Select Operator [SEL_96] (rows=1234698008 width=88) Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col16","_col17","_col23","_col24","_col25","_col26","_col29"] - Merge Join Operator [MERGEJOIN_594] (rows=1234698008 width=88) + Merge Join Operator [MERGEJOIN_592] (rows=1234698008 width=88) Conds:RS_93._col5, _col12=RS_94._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] <-Map 43 [SIMPLE_EDGE] SHUFFLE [RS_94] @@ -586,7 +586,7 @@ Stage-0 <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_93] PartitionCols:_col5, _col12 - Merge Join Operator [MERGEJOIN_593] (rows=1122452711 width=88) + Merge Join Operator [MERGEJOIN_591] (rows=1122452711 width=88) Conds:RS_90._col9=RS_91._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] <-Map 33 [SIMPLE_EDGE] SHUFFLE [RS_91] @@ -595,7 +595,7 @@ Stage-0 <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_90] PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_592] (rows=1020411534 width=88) + Merge Join Operator [MERGEJOIN_590] (rows=1020411534 width=88) Conds:RS_87._col10=RS_88._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] <-Map 42 [SIMPLE_EDGE] SHUFFLE [RS_88] @@ -604,7 +604,7 @@ Stage-0 <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_87] PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_591] (rows=927646829 width=88) + Merge Join Operator [MERGEJOIN_589] (rows=927646829 width=88) Conds:RS_84._col5=RS_85._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] <-Reducer 40 [ONE_TO_ONE_EDGE] FORWARD [RS_85] @@ -613,18 +613,18 @@ Stage-0 <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_84] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_590] (rows=843315281 width=88) + Merge Join Operator [MERGEJOIN_588] (rows=843315281 width=88) Conds:RS_81._col0=RS_82._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] <-Reducer 31 [SIMPLE_EDGE] SHUFFLE [RS_81] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_581] + Please refer to the previous Merge Join Operator [MERGEJOIN_579] <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_82] PartitionCols:_col5 Select Operator [SEL_52] (rows=766650239 width=88) Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col16"] - Merge Join Operator [MERGEJOIN_588] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_586] (rows=766650239 width=88) Conds:RS_49._col7=RS_50._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] <-Map 37 [SIMPLE_EDGE] SHUFFLE [RS_50] @@ -633,18 +633,18 @@ Stage-0 <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_587] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_585] (rows=696954748 width=88) Conds:RS_46._col0=RS_47._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] <-Map 15 [SIMPLE_EDGE] SHUFFLE [RS_47] PartitionCols:_col0 Select Operator [SEL_39] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_552] (rows=36524 width=1119) + Filter Operator [FIL_550] (rows=36524 width=1119) predicate:((d_year = 2000) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Reducer 35 [SIMPLE_EDGE] SHUFFLE [RS_46] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_586] + Please refer to the previous Merge Join Operator [MERGEJOIN_584] diff --git ql/src/test/results/clientpositive/perf/tez/query68.q.out ql/src/test/results/clientpositive/perf/tez/query68.q.out index e8f00ff..1634063 100644 --- ql/src/test/results/clientpositive/perf/tez/query68.q.out +++ ql/src/test/results/clientpositive/perf/tez/query68.q.out @@ -97,44 +97,44 @@ Stage-0 limit:100 Stage-1 Reducer 4 - File Output Operator [FS_50] - Limit [LIM_49] (rows=100 width=88) + File Output Operator [FS_49] + Limit [LIM_48] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_48] (rows=463823414 width=88) + Select Operator [SEL_47] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_47] - Select Operator [SEL_46] (rows=463823414 width=88) + SHUFFLE [RS_46] + Select Operator [SEL_45] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_45] (rows=463823414 width=88) + Filter Operator [FIL_44] (rows=463823414 width=88) predicate:(_col5 <> _col8) - Merge Join Operator [MERGEJOIN_86] (rows=463823414 width=88) - Conds:RS_42._col0=RS_43._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_85] (rows=463823414 width=88) + Conds:RS_41._col0=RS_42._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_42] + SHUFFLE [RS_41] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_81] (rows=88000001 width=860) - Conds:RS_39._col1=RS_40._col0(Inner),Output:["_col0","_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_80] (rows=88000001 width=860) + Conds:RS_38._col1=RS_39._col0(Inner),Output:["_col0","_col2","_col3","_col5"] <-Map 5 [SIMPLE_EDGE] - SHUFFLE [RS_40] + SHUFFLE [RS_39] PartitionCols:_col0 Select Operator [SEL_5] (rows=40000000 width=1014) Output:["_col0","_col1"] - Filter Operator [FIL_75] (rows=40000000 width=1014) + Filter Operator [FIL_74] (rows=40000000 width=1014) predicate:ca_address_sk is not null TableScan [TS_3] (rows=40000000 width=1014) default@customer_address,current_addr,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_city"] <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_39] + SHUFFLE [RS_38] PartitionCols:_col1 Select Operator [SEL_2] (rows=80000000 width=860) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_74] (rows=80000000 width=860) + Filter Operator [FIL_73] (rows=80000000 width=860) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) TableScan [TS_0] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name"] <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_43] + SHUFFLE [RS_42] PartitionCols:_col1 Select Operator [SEL_37] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] @@ -145,7 +145,7 @@ Stage-0 PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_34] (rows=843315281 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col1, _col18, _col3, _col5 - Merge Join Operator [MERGEJOIN_85] (rows=843315281 width=88) + Merge Join Operator [MERGEJOIN_84] (rows=843315281 width=88) Conds:RS_30._col3=RS_31._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col18"] <-Map 5 [SIMPLE_EDGE] SHUFFLE [RS_31] @@ -154,42 +154,42 @@ Stage-0 <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_84] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_83] (rows=766650239 width=88) Conds:RS_27._col2=RS_28._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8"] <-Map 14 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col0 Select Operator [SEL_17] (rows=7200 width=107) Output:["_col0"] - Filter Operator [FIL_79] (rows=7200 width=107) + Filter Operator [FIL_78] (rows=7200 width=107) predicate:(((hd_dep_count = 2) or (hd_vehicle_count = 1)) and hd_demo_sk is not null) TableScan [TS_15] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_83] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_82] (rows=696954748 width=88) Conds:RS_24._col4=RS_25._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] <-Map 13 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0 Select Operator [SEL_14] (rows=852 width=1910) Output:["_col0"] - Filter Operator [FIL_78] (rows=852 width=1910) + Filter Operator [FIL_77] (rows=852 width=1910) predicate:((s_city) IN ('Cedar Grove', 'Wildwood') and s_store_sk is not null) TableScan [TS_12] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_city"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_82] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_81] (rows=633595212 width=88) Conds:RS_21._col0=RS_22._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0 Select Operator [SEL_11] (rows=4058 width=1119) Output:["_col0"] - Filter Operator [FIL_77] (rows=4058 width=1119) + Filter Operator [FIL_76] (rows=4058 width=1119) predicate:((d_year) IN (1998, 1999, 2000) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"] @@ -198,7 +198,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_8] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_76] (rows=575995635 width=88) + Filter Operator [FIL_75] (rows=575995635 width=88) predicate:(ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_6] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_ext_sales_price","ss_ext_list_price","ss_ext_tax"] diff --git ql/src/test/results/clientpositive/perf/tez/query71.q.out ql/src/test/results/clientpositive/perf/tez/query71.q.out index 6635b2e..c07580e 100644 --- ql/src/test/results/clientpositive/perf/tez/query71.q.out +++ ql/src/test/results/clientpositive/perf/tez/query71.q.out @@ -90,11 +90,11 @@ Stage-0 limit:-1 Stage-1 Reducer 7 - File Output Operator [FS_53] - Select Operator [SEL_51] (rows=670816149 width=108) + File Output Operator [FS_52] + Select Operator [SEL_50] (rows=670816149 width=108) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_50] + SHUFFLE [RS_49] Select Operator [SEL_48] (rows=670816149 width=108) Output:["_col1","_col2","_col3","_col4","_col5"] Group By Operator [GBY_47] (rows=670816149 width=108) @@ -104,28 +104,28 @@ Stage-0 PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_45] (rows=1341632299 width=108) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col0)"],keys:_col4, _col8, _col9, _col5 - Merge Join Operator [MERGEJOIN_87] (rows=1341632299 width=108) + Merge Join Operator [MERGEJOIN_86] (rows=1341632299 width=108) Conds:RS_41._col2=RS_42._col0(Inner),Output:["_col0","_col4","_col5","_col8","_col9"] <-Map 16 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col0 Select Operator [SEL_37] (rows=86400 width=471) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_82] (rows=86400 width=471) + Filter Operator [FIL_81] (rows=86400 width=471) predicate:(((t_meal_time = 'breakfast') or (t_meal_time = 'dinner')) and t_time_sk is not null) TableScan [TS_35] (rows=86400 width=471) default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute","t_meal_time"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_86] (rows=1219665700 width=108) + Merge Join Operator [MERGEJOIN_85] (rows=1219665700 width=108) Conds:Union 3._col1=RS_39._col0(Inner),Output:["_col0","_col2","_col4","_col5"] <-Map 15 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col0 Select Operator [SEL_34] (rows=231000 width=1436) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_81] (rows=231000 width=1436) + Filter Operator [FIL_80] (rows=231000 width=1436) predicate:((i_manager_id = 1) and i_item_sk is not null) TableScan [TS_32] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"] @@ -135,14 +135,14 @@ Stage-0 PartitionCols:_col1 Select Operator [SEL_19] (rows=316788826 width=135) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_84] (rows=316788826 width=135) + Merge Join Operator [MERGEJOIN_83] (rows=316788826 width=135) Conds:RS_16._col0=RS_17._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 Select Operator [SEL_15] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_78] (rows=18262 width=1119) + Filter Operator [FIL_77] (rows=18262 width=1119) predicate:((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) TableScan [TS_13] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] @@ -151,7 +151,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_12] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_77] (rows=287989836 width=135) + Filter Operator [FIL_76] (rows=287989836 width=135) predicate:(cs_item_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null) TableScan [TS_10] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_item_sk","cs_ext_sales_price"] @@ -160,14 +160,14 @@ Stage-0 PartitionCols:_col1 Select Operator [SEL_30] (rows=633595212 width=88) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_85] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_84] (rows=633595212 width=88) Conds:RS_27._col0=RS_28._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col0 Select Operator [SEL_23] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_79] (rows=575995635 width=88) + Filter Operator [FIL_78] (rows=575995635 width=88) predicate:(ss_item_sk is not null and ss_sold_date_sk is not null and ss_sold_time_sk is not null) TableScan [TS_21] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_sold_time_sk","ss_item_sk","ss_ext_sales_price"] @@ -176,7 +176,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_26] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_80] (rows=18262 width=1119) + Filter Operator [FIL_79] (rows=18262 width=1119) predicate:((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) TableScan [TS_24] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] @@ -185,14 +185,14 @@ Stage-0 PartitionCols:_col1 Select Operator [SEL_9] (rows=158402938 width=135) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_83] (rows=158402938 width=135) + Merge Join Operator [MERGEJOIN_82] (rows=158402938 width=135) Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_6] PartitionCols:_col0 Select Operator [SEL_2] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_75] (rows=144002668 width=135) + Filter Operator [FIL_74] (rows=144002668 width=135) predicate:(ws_item_sk is not null and ws_sold_date_sk is not null and ws_sold_time_sk is not null) TableScan [TS_0] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_item_sk","ws_ext_sales_price"] @@ -201,7 +201,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_5] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_76] (rows=18262 width=1119) + Filter Operator [FIL_75] (rows=18262 width=1119) predicate:((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] diff --git ql/src/test/results/clientpositive/perf/tez/query78.q.out ql/src/test/results/clientpositive/perf/tez/query78.q.out index 9ec4739..7cf4200 100644 --- ql/src/test/results/clientpositive/perf/tez/query78.q.out +++ ql/src/test/results/clientpositive/perf/tez/query78.q.out @@ -133,21 +133,21 @@ Stage-0 limit:100 Stage-1 Reducer 6 - File Output Operator [FS_78] - Limit [LIM_77] (rows=100 width=88) + File Output Operator [FS_77] + Limit [LIM_76] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_75] (rows=23425424 width=88) + Select Operator [SEL_74] (rows=23425424 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_74] - Select Operator [SEL_73] (rows=23425424 width=88) + SHUFFLE [RS_73] + Select Operator [SEL_72] (rows=23425424 width=88) Output:["_col0","_col1","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Filter Operator [FIL_72] (rows=23425424 width=88) + Filter Operator [FIL_71] (rows=23425424 width=88) predicate:(COALESCE(_col11,0) > 0) - Merge Join Operator [MERGEJOIN_113] (rows=70276272 width=88) - Conds:RS_69._col1=RS_70._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_112] (rows=70276272 width=88) + Conds:RS_68._col1=RS_69._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col11","_col12","_col13"] <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_70] + SHUFFLE [RS_69] PartitionCols:_col0 Select Operator [SEL_67] (rows=43558464 width=135) Output:["_col0","_col1","_col2","_col3"] @@ -158,14 +158,14 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_64] (rows=87116928 width=135) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col3, _col4 - Merge Join Operator [MERGEJOIN_111] (rows=87116928 width=135) + Merge Join Operator [MERGEJOIN_110] (rows=87116928 width=135) Conds:RS_60._col0=RS_61._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_60] PartitionCols:_col0 Select Operator [SEL_2] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_97] (rows=36524 width=1119) + Filter Operator [FIL_96] (rows=36524 width=1119) predicate:((d_year = 2000) and d_date_sk is not null) TableScan [TS_0] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] @@ -176,14 +176,14 @@ Stage-0 Output:["_col0","_col1","_col2","_col4","_col5","_col6"] Filter Operator [FIL_58] (rows=79197206 width=135) predicate:_col8 is null - Merge Join Operator [MERGEJOIN_108] (rows=158394413 width=135) + Merge Join Operator [MERGEJOIN_107] (rows=158394413 width=135) Conds:RS_55._col2, _col3=RS_56._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] <-Map 17 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col2, _col3 Select Operator [SEL_52] (rows=143994918 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_104] (rows=143994918 width=135) + Filter Operator [FIL_103] (rows=143994918 width=135) predicate:((cs_item_sk = cs_item_sk) and cs_sold_date_sk is not null) TableScan [TS_50] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_order_number","cs_quantity","cs_wholesale_cost","cs_sales_price"] @@ -195,11 +195,11 @@ Stage-0 TableScan [TS_53] (rows=28798881 width=106) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_69] + SHUFFLE [RS_68] PartitionCols:_col1 Filter Operator [FIL_45] (rows=63887519 width=88) predicate:(COALESCE(_col7,0) > 0) - Merge Join Operator [MERGEJOIN_112] (rows=191662559 width=88) + Merge Join Operator [MERGEJOIN_111] (rows=191662559 width=88) Conds:RS_42._col1, _col0=RS_43._col1, _col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9"] <-Reducer 3 [ONE_TO_ONE_EDGE] FORWARD [RS_42] @@ -213,7 +213,7 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_17] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_109] (rows=348477374 width=88) + Merge Join Operator [MERGEJOIN_108] (rows=348477374 width=88) Conds:RS_13._col0=RS_14._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_13] @@ -226,14 +226,14 @@ Stage-0 Output:["_col0","_col1","_col2","_col4","_col5","_col6"] Filter Operator [FIL_11] (rows=316797606 width=88) predicate:_col8 is null - Merge Join Operator [MERGEJOIN_106] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_105] (rows=633595212 width=88) Conds:RS_8._col1, _col3=RS_9._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_8] PartitionCols:_col1, _col3 Select Operator [SEL_5] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_98] (rows=575995635 width=88) + Filter Operator [FIL_97] (rows=575995635 width=88) predicate:ss_sold_date_sk is not null TableScan [TS_3] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_ticket_number","ss_quantity","ss_wholesale_cost","ss_sales_price"] @@ -256,7 +256,7 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_38] (rows=87121617 width=135) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_110] (rows=87121617 width=135) + Merge Join Operator [MERGEJOIN_109] (rows=87121617 width=135) Conds:RS_34._col0=RS_35._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_34] @@ -269,14 +269,14 @@ Stage-0 Output:["_col0","_col1","_col2","_col4","_col5","_col6"] Filter Operator [FIL_32] (rows=79201469 width=135) predicate:_col8 is null - Merge Join Operator [MERGEJOIN_107] (rows=158402938 width=135) + Merge Join Operator [MERGEJOIN_106] (rows=158402938 width=135) Conds:RS_29._col1, _col3=RS_30._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] <-Map 14 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col1, _col3 Select Operator [SEL_26] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_101] (rows=144002668 width=135) + Filter Operator [FIL_100] (rows=144002668 width=135) predicate:ws_sold_date_sk is not null TableScan [TS_24] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_order_number","ws_quantity","ws_wholesale_cost","ws_sales_price"] diff --git ql/src/test/results/clientpositive/perf/tez/query79.q.out ql/src/test/results/clientpositive/perf/tez/query79.q.out index b34a83f..09c71b0 100644 --- ql/src/test/results/clientpositive/perf/tez/query79.q.out +++ ql/src/test/results/clientpositive/perf/tez/query79.q.out @@ -57,28 +57,28 @@ Stage-0 limit:-1 Stage-1 Reducer 3 - File Output Operator [FS_38] - Limit [LIM_36] (rows=100 width=88) + File Output Operator [FS_37] + Limit [LIM_35] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_35] (rows=421657640 width=88) + Select Operator [SEL_34] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_34] - Select Operator [SEL_33] (rows=421657640 width=88) + SHUFFLE [RS_33] + Select Operator [SEL_32] (rows=421657640 width=88) Output:["_col0","_col1","_col3","_col4","_col5","_col6"] - Merge Join Operator [MERGEJOIN_61] (rows=421657640 width=88) - Conds:RS_30._col0=RS_31._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_60] (rows=421657640 width=88) + Conds:RS_29._col0=RS_30._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_30] + SHUFFLE [RS_29] PartitionCols:_col0 Select Operator [SEL_2] (rows=80000000 width=860) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_53] (rows=80000000 width=860) + Filter Operator [FIL_52] (rows=80000000 width=860) predicate:c_customer_sk is not null TableScan [TS_0] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_first_name","c_last_name"] <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_31] + SHUFFLE [RS_30] PartitionCols:_col1 Select Operator [SEL_28] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] @@ -89,42 +89,42 @@ Stage-0 PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_25] (rows=766650239 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","sum(_col7)"],keys:_col1, _col3, _col5, _col13 - Merge Join Operator [MERGEJOIN_60] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_59] (rows=766650239 width=88) Conds:RS_21._col2=RS_22._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col13"] <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0 Select Operator [SEL_14] (rows=6000 width=107) Output:["_col0"] - Filter Operator [FIL_57] (rows=6000 width=107) + Filter Operator [FIL_56] (rows=6000 width=107) predicate:(((hd_dep_count = 8) or (hd_vehicle_count > 0)) and hd_demo_sk is not null) TableScan [TS_12] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_59] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_58] (rows=696954748 width=88) Conds:RS_18._col4=RS_19._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col13"] <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_11] (rows=189 width=1910) Output:["_col0","_col2"] - Filter Operator [FIL_56] (rows=189 width=1910) + Filter Operator [FIL_55] (rows=189 width=1910) predicate:(s_number_employees BETWEEN 200 AND 295 and s_store_sk is not null) TableScan [TS_9] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_number_employees","s_city"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_58] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_57] (rows=633595212 width=88) Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 4 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col0 Select Operator [SEL_5] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_54] (rows=575995635 width=88) + Filter Operator [FIL_53] (rows=575995635 width=88) predicate:(ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_3] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_coupon_amt","ss_net_profit"] @@ -133,7 +133,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_8] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_55] (rows=18262 width=1119) + Filter Operator [FIL_54] (rows=18262 width=1119) predicate:((d_dow = 1) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dow"] diff --git ql/src/test/results/clientpositive/perf/tez/query87.q.out ql/src/test/results/clientpositive/perf/tez/query87.q.out index 19f0e00..0455411 100644 --- ql/src/test/results/clientpositive/perf/tez/query87.q.out +++ ql/src/test/results/clientpositive/perf/tez/query87.q.out @@ -61,143 +61,143 @@ Stage-0 limit:-1 Stage-1 Reducer 9 - File Output Operator [FS_104] - Group By Operator [GBY_102] (rows=1 width=24) + File Output Operator [FS_101] + Group By Operator [GBY_99] (rows=1 width=24) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 8 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_101] - Group By Operator [GBY_100] (rows=1 width=24) + PARTITION_ONLY_SHUFFLE [RS_98] + Group By Operator [GBY_97] (rows=1 width=24) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_98] (rows=4537552 width=129) - Filter Operator [FIL_97] (rows=4537552 width=129) + Select Operator [SEL_95] (rows=4537552 width=129) + Filter Operator [FIL_94] (rows=4537552 width=129) predicate:(((_col3 * 2) = _col4) and (_col3 > 0L)) - Select Operator [SEL_129] (rows=27225312 width=129) + Select Operator [SEL_126] (rows=27225312 width=129) Output:["_col3","_col4"] - Group By Operator [GBY_96] (rows=27225312 width=129) + Group By Operator [GBY_93] (rows=27225312 width=129) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 7 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] - Reduce Output Operator [RS_95] + Reduce Output Operator [RS_92] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_94] (rows=54450625 width=129) + Group By Operator [GBY_91] (rows=54450625 width=129) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_92] (rows=54450625 width=129) + Select Operator [SEL_89] (rows=54450625 width=129) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_89] (rows=43560808 width=135) + Select Operator [SEL_87] (rows=43560808 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_88] (rows=43560808 width=135) + Group By Operator [GBY_86] (rows=43560808 width=135) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_84] (rows=87121617 width=135) + Select Operator [SEL_82] (rows=87121617 width=135) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_83] (rows=87121617 width=135) + Group By Operator [GBY_81] (rows=87121617 width=135) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_82] + SHUFFLE [RS_80] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_81] (rows=174243235 width=135) + Group By Operator [GBY_79] (rows=174243235 width=135) Output:["_col0","_col1","_col2"],keys:_col7, _col6, _col3 - Merge Join Operator [MERGEJOIN_135] (rows=174243235 width=135) - Conds:RS_77._col1=RS_78._col0(Inner),Output:["_col3","_col6","_col7"] + Merge Join Operator [MERGEJOIN_132] (rows=174243235 width=135) + Conds:RS_75._col1=RS_76._col0(Inner),Output:["_col3","_col6","_col7"] <-Map 17 [SIMPLE_EDGE] - SHUFFLE [RS_78] + SHUFFLE [RS_76] PartitionCols:_col0 Select Operator [SEL_8] (rows=80000000 width=860) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_122] (rows=80000000 width=860) + Filter Operator [FIL_119] (rows=80000000 width=860) predicate:c_customer_sk is not null TableScan [TS_6] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_first_name","c_last_name"] <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_77] + SHUFFLE [RS_75] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_134] (rows=158402938 width=135) - Conds:RS_74._col0=RS_75._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_131] (rows=158402938 width=135) + Conds:RS_72._col0=RS_73._col0(Inner),Output:["_col1","_col3"] <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_75] + SHUFFLE [RS_73] PartitionCols:_col0 Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_121] (rows=8116 width=1119) + Filter Operator [FIL_118] (rows=8116 width=1119) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_month_seq"] <-Map 19 [SIMPLE_EDGE] - SHUFFLE [RS_74] + SHUFFLE [RS_72] PartitionCols:_col0 - Select Operator [SEL_67] (rows=144002668 width=135) + Select Operator [SEL_65] (rows=144002668 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_126] (rows=144002668 width=135) + Filter Operator [FIL_123] (rows=144002668 width=135) predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_65] (rows=144002668 width=135) + TableScan [TS_63] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] <-Reducer 6 [CONTAINS] - Reduce Output Operator [RS_95] + Reduce Output Operator [RS_92] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_94] (rows=54450625 width=129) + Group By Operator [GBY_91] (rows=54450625 width=129) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_92] (rows=54450625 width=129) + Select Operator [SEL_89] (rows=54450625 width=129) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_64] (rows=10889817 width=103) + Select Operator [SEL_62] (rows=10889817 width=103) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_63] (rows=10889817 width=103) + Group By Operator [GBY_61] (rows=10889817 width=103) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_59] (rows=21779634 width=103) + Select Operator [SEL_57] (rows=21779634 width=103) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_58] (rows=21779634 width=103) + Filter Operator [FIL_56] (rows=21779634 width=103) predicate:(((_col3 * 2) = _col4) and (_col3 > 0L)) - Group By Operator [GBY_57] (rows=130677808 width=103) + Group By Operator [GBY_55] (rows=130677808 width=103) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 5 [SIMPLE_EDGE] <-Reducer 13 [CONTAINS] - Reduce Output Operator [RS_56] + Reduce Output Operator [RS_54] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_55] (rows=261355616 width=103) + Group By Operator [GBY_53] (rows=261355616 width=103) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_53] (rows=261355616 width=103) + Select Operator [SEL_51] (rows=261355616 width=103) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_50] (rows=87116929 width=135) + Select Operator [SEL_49] (rows=87116929 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_49] (rows=87116929 width=135) + Group By Operator [GBY_48] (rows=87116929 width=135) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_45] (rows=174233858 width=135) + Select Operator [SEL_44] (rows=174233858 width=135) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_44] (rows=174233858 width=135) + Group By Operator [GBY_43] (rows=174233858 width=135) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_43] + SHUFFLE [RS_42] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_42] (rows=348467716 width=135) + Group By Operator [GBY_41] (rows=348467716 width=135) Output:["_col0","_col1","_col2"],keys:_col7, _col6, _col3 - Merge Join Operator [MERGEJOIN_133] (rows=348467716 width=135) - Conds:RS_38._col1=RS_39._col0(Inner),Output:["_col3","_col6","_col7"] + Merge Join Operator [MERGEJOIN_130] (rows=348467716 width=135) + Conds:RS_37._col1=RS_38._col0(Inner),Output:["_col3","_col6","_col7"] <-Map 17 [SIMPLE_EDGE] - SHUFFLE [RS_39] + SHUFFLE [RS_38] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_8] <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_38] + SHUFFLE [RS_37] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_132] (rows=316788826 width=135) - Conds:RS_35._col0=RS_36._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_129] (rows=316788826 width=135) + Conds:RS_34._col0=RS_35._col0(Inner),Output:["_col1","_col3"] <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_36] + SHUFFLE [RS_35] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_5] <-Map 18 [SIMPLE_EDGE] - SHUFFLE [RS_35] + SHUFFLE [RS_34] PartitionCols:_col0 - Select Operator [SEL_28] (rows=287989836 width=135) + Select Operator [SEL_27] (rows=287989836 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_123] (rows=287989836 width=135) + Filter Operator [FIL_120] (rows=287989836 width=135) predicate:(cs_bill_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_26] (rows=287989836 width=135) + TableScan [TS_25] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk"] <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_56] + Reduce Output Operator [RS_54] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_55] (rows=261355616 width=103) + Group By Operator [GBY_53] (rows=261355616 width=103) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_53] (rows=261355616 width=103) + Select Operator [SEL_51] (rows=261355616 width=103) Output:["_col0","_col1","_col2","_col3","_col4"] Select Operator [SEL_24] (rows=174238687 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] @@ -212,7 +212,7 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_16] (rows=696954748 width=88) Output:["_col0","_col1","_col2"],keys:_col7, _col6, _col3 - Merge Join Operator [MERGEJOIN_131] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_128] (rows=696954748 width=88) Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col3","_col6","_col7"] <-Map 17 [SIMPLE_EDGE] SHUFFLE [RS_13] @@ -221,7 +221,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_130] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_127] (rows=633595212 width=88) Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1","_col3"] <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_10] @@ -232,7 +232,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_2] (rows=575995635 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_120] (rows=575995635 width=88) + Filter Operator [FIL_117] (rows=575995635 width=88) predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] diff --git ql/src/test/results/clientpositive/perf/tez/query89.q.out ql/src/test/results/clientpositive/perf/tez/query89.q.out index 1beb703..8d3f6c8 100644 --- ql/src/test/results/clientpositive/perf/tez/query89.q.out +++ ql/src/test/results/clientpositive/perf/tez/query89.q.out @@ -81,67 +81,65 @@ Stage-0 Select Operator [SEL_29] (rows=383325119 width=88) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] PTF Operator [PTF_28] (rows=383325119 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col4 ASC NULLS FIRST","partition by:":"_col0, _col2, _col3, _col4"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST","partition by:":"_col2, _col0, _col4, _col5"}] Select Operator [SEL_27] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_26] - PartitionCols:_col0, _col2, _col3, _col4 - Select Operator [SEL_25] (rows=383325119 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_24] (rows=383325119 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_23] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_22] (rows=766650239 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col7, _col10, _col12, _col13 - Merge Join Operator [MERGEJOIN_54] (rows=766650239 width=88) - Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col10","_col12","_col13"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=1704 width=1910) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_51] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_53] (rows=696954748 width=88) - Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col7","_col10"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=36524 width=1119) - Output:["_col0","_col2"] - Filter Operator [FIL_50] (rows=36524 width=1119) - predicate:((d_year = 2000) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_52] (rows=633595212 width=88) - Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6","_col7"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_48] (rows=575995635 width=88) - predicate:(ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=231000 width=1436) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_49] (rows=231000 width=1436) - predicate:((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and ((i_category) IN ('Home', 'Books', 'Electronics') or (i_category) IN ('Shoes', 'Jewelry', 'Men')) and ((i_class) IN ('wallpaper', 'parenting', 'musical') or (i_class) IN ('womens', 'birdal', 'pants')) and i_item_sk is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category"] + PartitionCols:_col2, _col0, _col4, _col5 + Group By Operator [GBY_24] (rows=383325119 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_22] (rows=766650239 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col7, _col10, _col12, _col13 + Merge Join Operator [MERGEJOIN_54] (rows=766650239 width=88) + Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col10","_col12","_col13"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=1704 width=1910) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_51] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_9] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_company_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_53] (rows=696954748 width=88) + Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col7","_col10"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=36524 width=1119) + Output:["_col0","_col2"] + Filter Operator [FIL_50] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_52] (rows=633595212 width=88) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6","_col7"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Select Operator [SEL_2] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_48] (rows=575995635 width=88) + predicate:(ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=231000 width=1436) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_49] (rows=231000 width=1436) + predicate:((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and ((i_category) IN ('Home', 'Books', 'Electronics') or (i_category) IN ('Shoes', 'Jewelry', 'Men')) and ((i_class) IN ('wallpaper', 'parenting', 'musical') or (i_class) IN ('womens', 'birdal', 'pants')) and i_item_sk is not null) + TableScan [TS_3] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category"] diff --git ql/src/test/results/clientpositive/perf/tez/query98.q.out ql/src/test/results/clientpositive/perf/tez/query98.q.out index 19bb49c..e403384 100644 --- ql/src/test/results/clientpositive/perf/tez/query98.q.out +++ ql/src/test/results/clientpositive/perf/tez/query98.q.out @@ -74,61 +74,59 @@ Stage-0 limit:-1 Stage-1 Reducer 6 - File Output Operator [FS_28] - Select Operator [SEL_26] (rows=348477374 width=88) + File Output Operator [FS_27] + Select Operator [SEL_25] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_25] - Select Operator [SEL_23] (rows=348477374 width=88) + SHUFFLE [RS_24] + Select Operator [SEL_22] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_22] (rows=348477374 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST","partition by:":"_col3"}] - Select Operator [SEL_21] (rows=348477374 width=88) + PTF Operator [PTF_21] (rows=348477374 width=88) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] + Select Operator [SEL_20] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_20] - PartitionCols:_col3 - Select Operator [SEL_19] (rows=348477374 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_18] (rows=348477374 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] - PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_16] (rows=696954748 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col10, _col9, _col6, _col7, _col8 - Merge Join Operator [MERGEJOIN_38] (rows=696954748 width=88) - Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=231000 width=1436) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_36] (rows=231000 width=1436) - predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_37] (rows=633595212 width=88) - Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_9] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_34] (rows=575995635 width=88) - predicate:(ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_35] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00.0' AND TIMESTAMP'2001-02-11 00:00:00.0' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + SHUFFLE [RS_19] + PartitionCols:_col1 + Group By Operator [GBY_18] (rows=348477374 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_16] (rows=696954748 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col10, _col9, _col6, _col7, _col8 + Merge Join Operator [MERGEJOIN_37] (rows=696954748 width=88) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=231000 width=1436) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_35] (rows=231000 width=1436) + predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) + TableScan [TS_6] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_36] (rows=633595212 width=88) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_33] (rows=575995635 width=88) + predicate:(ss_item_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=8116 width=1119) + Output:["_col0"] + Filter Operator [FIL_34] (rows=8116 width=1119) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00.0' AND TIMESTAMP'2001-02-11 00:00:00.0' and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] diff --git ql/src/test/results/clientpositive/reduce_deduplicate_extended2.q.out ql/src/test/results/clientpositive/reduce_deduplicate_extended2.q.out index 9102bd4..7584d84 100644 --- ql/src/test/results/clientpositive/reduce_deduplicate_extended2.q.out +++ ql/src/test/results/clientpositive/reduce_deduplicate_extended2.q.out @@ -723,9 +723,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-4 depends on stages: Stage-3, Stage-8 Stage-6 is a root stage - Stage-7 depends on stages: Stage-6, Stage-9 + Stage-7 depends on stages: Stage-6 Stage-8 depends on stages: Stage-7 - Stage-9 is a root stage Stage-0 depends on stages: Stage-4 STAGE PLANS: @@ -846,26 +845,26 @@ STAGE PLANS: Map Operator Tree: TableScan Union - Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: -- - Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE TableScan Union - Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string) sort order: -- - Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -917,25 +916,39 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string), _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col0 (type: string) - 1 _col1 (type: string), _col0 (type: string) + 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col1 (type: string), _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -951,53 +964,17 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out index 8e4828c..4a36dd4 100644 --- ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out +++ ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out @@ -276,7 +276,7 @@ Stage-0 limit:-1 Stage-1 Reducer 2 - File Output Operator [FS_7] + File Output Operator [FS_6] Select Operator [SEL_5] (rows=10 width=97) Output:["_col0","_col1","_col2"] Group By Operator [GBY_4] (rows=10 width=101) @@ -306,15 +306,15 @@ Stage-0 limit:-1 Stage-1 Reducer 3 - File Output Operator [FS_12] - Select Operator [SEL_11] (rows=5 width=20) + File Output Operator [FS_11] + Select Operator [SEL_10] (rows=5 width=20) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_10] (rows=5 width=20) + Group By Operator [GBY_9] (rows=5 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [GROUP] - GROUP [RS_9] + GROUP [RS_8] PartitionCols:_col0, _col1 - Group By Operator [GBY_8] (rows=5 width=20) + Group By Operator [GBY_7] (rows=5 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1 Select Operator [SEL_5] (rows=10 width=101) Output:["_col0","_col1"] @@ -348,37 +348,37 @@ Stage-0 limit:-1 Stage-1 Reducer 4 - File Output Operator [FS_33] - Select Operator [SEL_31] (rows=1 width=20) + File Output Operator [FS_31] + Select Operator [SEL_29] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SORT] - SORT [RS_30] - Select Operator [SEL_29] (rows=1 width=28) + SORT [RS_28] + Select Operator [SEL_27] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_28] (rows=1 width=20) + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [GROUP] - GROUP [RS_27] + GROUP [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_26] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col1 - Select Operator [SEL_25] (rows=1 width=20) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col4"] - Filter Operator [FIL_23] (rows=1 width=20) + Filter Operator [FIL_21] (rows=1 width=20) predicate:(((_col3 + _col6) >= 0) and ((_col3 > 0) or (_col1 >= 0))) - Join Operator [JOIN_22] (rows=3 width=18) + Join Operator [JOIN_20] (rows=3 width=18) Output:["_col1","_col3","_col4","_col6"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":1,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} <-Map 1 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_19] + PARTITION-LEVEL SORT [RS_17] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_34] (rows=18 width=84) + Filter Operator [FIL_32] (rows=18 width=84) predicate:key is not null TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] <-Reducer 6 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_20] + PARTITION-LEVEL SORT [RS_18] PartitionCols:_col0 Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] @@ -389,25 +389,25 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_35] (rows=4 width=93) + Filter Operator [FIL_33] (rows=4 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 8 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_21] + PARTITION-LEVEL SORT [RS_19] PartitionCols:_col0 - Select Operator [SEL_17] (rows=1 width=89) + Select Operator [SEL_16] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=1 width=93) + Group By Operator [GBY_15] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 7 [GROUP] - GROUP [RS_15] + GROUP [RS_14] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=93) + Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_36] (rows=4 width=93) + Filter Operator [FIL_34] (rows=4 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) - TableScan [TS_11] (rows=20 width=88) + TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b % c asc, b desc) cbo_t1 left outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p left outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int % c asc, cbo_t3.c_int desc @@ -428,37 +428,37 @@ Stage-0 limit:-1 Stage-1 Reducer 4 - File Output Operator [FS_34] - Select Operator [SEL_32] (rows=1 width=20) + File Output Operator [FS_32] + Select Operator [SEL_30] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SORT] - SORT [RS_31] - Select Operator [SEL_30] (rows=1 width=28) + SORT [RS_29] + Select Operator [SEL_28] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_29] (rows=1 width=20) + Group By Operator [GBY_27] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [GROUP] - GROUP [RS_28] + GROUP [RS_26] PartitionCols:_col0, _col1 - Group By Operator [GBY_27] (rows=1 width=20) + Group By Operator [GBY_25] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_26] (rows=1 width=20) + Select Operator [SEL_24] (rows=1 width=20) Output:["_col1","_col4"] - Filter Operator [FIL_23] (rows=1 width=20) + Filter Operator [FIL_21] (rows=1 width=20) predicate:(((UDFToLong(_col1) + _col4) >= 0) and ((_col1 >= 1) or (_col4 >= 1L)) and ((_col3 + _col6) >= 0)) - Join Operator [JOIN_22] (rows=3 width=18) + Join Operator [JOIN_20] (rows=3 width=18) Output:["_col1","_col3","_col4","_col6"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":1,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} <-Map 1 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_19] + PARTITION-LEVEL SORT [RS_17] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_35] (rows=18 width=84) + Filter Operator [FIL_33] (rows=18 width=84) predicate:((c_int > 0) and key is not null) TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] <-Reducer 6 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_20] + PARTITION-LEVEL SORT [RS_18] PartitionCols:_col0 Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] @@ -469,25 +469,25 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_36] (rows=2 width=93) + Filter Operator [FIL_34] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 8 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_21] + PARTITION-LEVEL SORT [RS_19] PartitionCols:_col0 - Select Operator [SEL_17] (rows=1 width=89) + Select Operator [SEL_16] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=1 width=93) + Group By Operator [GBY_15] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 7 [GROUP] - GROUP [RS_15] + GROUP [RS_14] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=93) + Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_37] (rows=2 width=93) + Filter Operator [FIL_35] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) - TableScan [TS_11] (rows=20 width=88) + TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b+c, a desc) cbo_t1 right outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 2) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c @@ -507,31 +507,31 @@ Stage-0 limit:-1 Stage-1 Reducer 3 - File Output Operator [FS_30] - Group By Operator [GBY_28] (rows=1 width=20) + File Output Operator [FS_28] + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [GROUP] - GROUP [RS_27] + GROUP [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_26] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_25] (rows=1 width=20) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col4"] - Filter Operator [FIL_23] (rows=1 width=20) + Filter Operator [FIL_21] (rows=1 width=20) predicate:(((_col3 + _col6) >= 2) and ((_col3 > 0) or (_col1 >= 0))) - Join Operator [JOIN_22] (rows=3 width=18) + Join Operator [JOIN_20] (rows=3 width=18) Output:["_col1","_col3","_col4","_col6"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":1,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} <-Map 1 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_19] + PARTITION-LEVEL SORT [RS_17] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_31] (rows=18 width=84) + Filter Operator [FIL_29] (rows=18 width=84) predicate:key is not null TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] <-Reducer 5 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_20] + PARTITION-LEVEL SORT [RS_18] PartitionCols:_col0 Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] @@ -542,25 +542,25 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_32] (rows=2 width=93) + Filter Operator [FIL_30] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 7 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_21] + PARTITION-LEVEL SORT [RS_19] PartitionCols:_col0 - Select Operator [SEL_17] (rows=1 width=89) + Select Operator [SEL_16] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=1 width=93) + Group By Operator [GBY_15] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 6 [GROUP] - GROUP [RS_15] + GROUP [RS_14] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=93) + Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_33] (rows=2 width=93) + Filter Operator [FIL_31] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) - TableScan [TS_11] (rows=20 width=88) + TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by c+a desc) cbo_t1 full outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by p+q desc, r asc) cbo_t2 on cbo_t1.a=p full outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int @@ -581,35 +581,35 @@ Stage-0 limit:-1 Stage-1 Reducer 4 - File Output Operator [FS_33] - Select Operator [SEL_32] (rows=1 width=20) + File Output Operator [FS_31] + Select Operator [SEL_30] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SORT] - SORT [RS_31] - Group By Operator [GBY_29] (rows=1 width=20) + SORT [RS_29] + Group By Operator [GBY_27] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [GROUP] - GROUP [RS_28] + GROUP [RS_26] PartitionCols:_col0, _col1 - Group By Operator [GBY_27] (rows=1 width=20) + Group By Operator [GBY_25] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_26] (rows=1 width=20) + Select Operator [SEL_24] (rows=1 width=20) Output:["_col1","_col4"] - Filter Operator [FIL_23] (rows=1 width=20) + Filter Operator [FIL_21] (rows=1 width=20) predicate:(((UDFToLong(_col1) + _col4) >= 0) and ((_col1 >= 1) or (_col4 >= 1L)) and ((_col3 + _col6) >= 0)) - Join Operator [JOIN_22] (rows=3 width=18) + Join Operator [JOIN_20] (rows=3 width=18) Output:["_col1","_col3","_col4","_col6"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":1,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} <-Map 1 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_19] + PARTITION-LEVEL SORT [RS_17] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_34] (rows=18 width=84) + Filter Operator [FIL_32] (rows=18 width=84) predicate:((c_int > 0) and key is not null) TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] <-Reducer 6 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_20] + PARTITION-LEVEL SORT [RS_18] PartitionCols:_col0 Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] @@ -620,25 +620,25 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_35] (rows=2 width=93) + Filter Operator [FIL_33] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 8 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_21] + PARTITION-LEVEL SORT [RS_19] PartitionCols:_col0 - Select Operator [SEL_17] (rows=1 width=89) + Select Operator [SEL_16] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=1 width=93) + Group By Operator [GBY_15] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 7 [GROUP] - GROUP [RS_15] + GROUP [RS_14] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=93) + Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_36] (rows=2 width=93) + Filter Operator [FIL_34] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) - TableScan [TS_11] (rows=20 width=88) + TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c @@ -658,31 +658,31 @@ Stage-0 limit:-1 Stage-1 Reducer 3 - File Output Operator [FS_30] - Group By Operator [GBY_28] (rows=1 width=20) + File Output Operator [FS_28] + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [GROUP] - GROUP [RS_27] + GROUP [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_26] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_25] (rows=1 width=20) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col4"] - Filter Operator [FIL_23] (rows=1 width=20) + Filter Operator [FIL_21] (rows=1 width=20) predicate:(((_col3 + _col6) >= 0) and ((_col3 > 0) or (_col1 >= 0))) - Join Operator [JOIN_22] (rows=3 width=18) + Join Operator [JOIN_20] (rows=3 width=18) Output:["_col1","_col3","_col4","_col6"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":1,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} <-Map 1 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_19] + PARTITION-LEVEL SORT [RS_17] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_31] (rows=18 width=84) + Filter Operator [FIL_29] (rows=18 width=84) predicate:key is not null TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] <-Reducer 5 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_20] + PARTITION-LEVEL SORT [RS_18] PartitionCols:_col0 Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] @@ -693,25 +693,25 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_32] (rows=2 width=93) + Filter Operator [FIL_30] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 7 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_21] + PARTITION-LEVEL SORT [RS_19] PartitionCols:_col0 - Select Operator [SEL_17] (rows=1 width=89) + Select Operator [SEL_16] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=1 width=93) + Group By Operator [GBY_15] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 6 [GROUP] - GROUP [RS_15] + GROUP [RS_14] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=93) + Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_33] (rows=2 width=93) + Filter Operator [FIL_31] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (c_float > 0) and key is not null) - TableScan [TS_11] (rows=20 width=88) + TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc @@ -1191,13 +1191,13 @@ Stage-0 limit:1 Stage-1 Reducer 3 - File Output Operator [FS_10] - Limit [LIM_9] (rows=1 width=97) + File Output Operator [FS_9] + Limit [LIM_8] (rows=1 width=97) Number of rows:1 - Select Operator [SEL_8] (rows=10 width=97) + Select Operator [SEL_7] (rows=10 width=97) Output:["_col0","_col1","_col2"] <-Reducer 2 [SORT] - SORT [RS_7] + SORT [RS_6] Select Operator [SEL_5] (rows=10 width=97) Output:["_col0","_col1","_col2"] Group By Operator [GBY_4] (rows=10 width=101) @@ -1228,19 +1228,19 @@ Stage-0 limit:1 Stage-1 Reducer 4 - File Output Operator [FS_15] - Limit [LIM_14] (rows=1 width=20) + File Output Operator [FS_14] + Limit [LIM_13] (rows=1 width=20) Number of rows:1 - Select Operator [SEL_13] (rows=5 width=20) + Select Operator [SEL_12] (rows=5 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SORT] - SORT [RS_12] - Group By Operator [GBY_10] (rows=5 width=20) + SORT [RS_11] + Group By Operator [GBY_9] (rows=5 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [GROUP] - GROUP [RS_9] + GROUP [RS_8] PartitionCols:_col0, _col1 - Group By Operator [GBY_8] (rows=5 width=20) + Group By Operator [GBY_7] (rows=5 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 Select Operator [SEL_5] (rows=10 width=101) Output:["_col0","_col1"] @@ -1332,48 +1332,48 @@ Stage-0 limit:-1 Stage-1 Reducer 6 - File Output Operator [FS_46] - Limit [LIM_44] (rows=1 width=28) + File Output Operator [FS_44] + Limit [LIM_42] (rows=1 width=28) Number of rows:5 - Select Operator [SEL_43] (rows=1 width=28) + Select Operator [SEL_41] (rows=1 width=28) Output:["_col0","_col1","_col2"] <-Reducer 5 [SORT] - SORT [RS_42] - Select Operator [SEL_41] (rows=1 width=28) + SORT [RS_40] + Select Operator [SEL_39] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_40] (rows=1 width=20) + Group By Operator [GBY_38] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 4 [GROUP] - GROUP [RS_39] + GROUP [RS_37] PartitionCols:_col0, _col1 - Group By Operator [GBY_38] (rows=1 width=20) + Group By Operator [GBY_36] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col6 - Select Operator [SEL_37] (rows=2 width=20) + Select Operator [SEL_35] (rows=2 width=20) Output:["_col4","_col6"] - Filter Operator [FIL_35] (rows=2 width=20) + Filter Operator [FIL_33] (rows=2 width=20) predicate:(((_col3 + _col1) >= 0) and ((_col3 > 0) or (_col6 >= 0))) - Join Operator [JOIN_34] (rows=6 width=19) + Join Operator [JOIN_32] (rows=6 width=19) Output:["_col1","_col3","_col4","_col6"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":1,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} <-Map 10 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_33] + PARTITION-LEVEL SORT [RS_31] PartitionCols:_col0 - Select Operator [SEL_30] (rows=18 width=84) + Select Operator [SEL_28] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_49] (rows=18 width=84) + Filter Operator [FIL_47] (rows=18 width=84) predicate:key is not null - TableScan [TS_28] (rows=20 width=84) + TableScan [TS_26] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] <-Reducer 3 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_31] + PARTITION-LEVEL SORT [RS_29] PartitionCols:_col0 - Filter Operator [FIL_12] (rows=2 width=105) + Filter Operator [FIL_11] (rows=2 width=105) predicate:_col0 is not null - Limit [LIM_10] (rows=3 width=105) + Limit [LIM_9] (rows=3 width=105) Number of rows:5 - Select Operator [SEL_9] (rows=3 width=105) + Select Operator [SEL_8] (rows=3 width=105) Output:["_col0","_col1"] <-Reducer 2 [SORT] - SORT [RS_8] + SORT [RS_7] Select Operator [SEL_6] (rows=3 width=105) Output:["_col0","_col1","_col2","_col3"] Group By Operator [GBY_5] (rows=3 width=101) @@ -1383,33 +1383,33 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_3] (rows=3 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_47] (rows=6 width=93) + Filter Operator [FIL_45] (rows=6 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0))) TableScan [TS_0] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 9 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_32] + PARTITION-LEVEL SORT [RS_30] PartitionCols:_col0 - Filter Operator [FIL_26] (rows=2 width=97) + Filter Operator [FIL_24] (rows=2 width=97) predicate:_col0 is not null - Limit [LIM_24] (rows=3 width=97) + Limit [LIM_22] (rows=3 width=97) Number of rows:5 - Select Operator [SEL_23] (rows=3 width=97) + Select Operator [SEL_21] (rows=3 width=97) Output:["_col0","_col1","_col2"] <-Reducer 8 [SORT] - SORT [RS_22] - Select Operator [SEL_20] (rows=3 width=97) + SORT [RS_20] + Select Operator [SEL_19] (rows=3 width=97) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_19] (rows=3 width=101) + Group By Operator [GBY_18] (rows=3 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 7 [GROUP] - GROUP [RS_18] + GROUP [RS_17] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_17] (rows=3 width=101) + Group By Operator [GBY_16] (rows=3 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_48] (rows=6 width=93) + Filter Operator [FIL_46] (rows=6 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0))) - TableScan [TS_14] (rows=20 width=88) + TableScan [TS_13] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t1.c_int from cbo_t1 left semi join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) @@ -1521,35 +1521,35 @@ Stage-0 limit:-1 Stage-1 Reducer 5 - File Output Operator [FS_35] - Select Operator [SEL_34] (rows=1 width=101) + File Output Operator [FS_34] + Select Operator [SEL_33] (rows=1 width=101) Output:["_col0","_col1","_col2"] <-Reducer 4 [SORT] - SORT [RS_33] - Select Operator [SEL_32] (rows=1 width=101) + SORT [RS_32] + Select Operator [SEL_31] (rows=1 width=101) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_31] (rows=1 width=101) + Group By Operator [GBY_30] (rows=1 width=101) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [GROUP] - GROUP [RS_30] + GROUP [RS_29] PartitionCols:_col0, _col1 - Group By Operator [GBY_29] (rows=1 width=101) + Group By Operator [GBY_28] (rows=1 width=101) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 - Join Operator [JOIN_27] (rows=1 width=93) + Join Operator [JOIN_26] (rows=1 width=93) Output:["_col0","_col1"],condition map:[{"":"{\"type\":\"Left Semi\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Left Semi\",\"left\":0,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} <-Map 8 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_26] + PARTITION-LEVEL SORT [RS_25] PartitionCols:_col0 - Group By Operator [GBY_23] (rows=3 width=85) + Group By Operator [GBY_22] (rows=3 width=85) Output:["_col0"],keys:_col0 - Select Operator [SEL_19] (rows=6 width=85) + Select Operator [SEL_18] (rows=6 width=85) Output:["_col0"] - Filter Operator [FIL_38] (rows=6 width=85) + Filter Operator [FIL_37] (rows=6 width=85) predicate:(UDFToDouble(key) > 0.0D) - TableScan [TS_17] (rows=20 width=80) + TableScan [TS_16] (rows=20 width=80) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Reducer 2 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_24] + PARTITION-LEVEL SORT [RS_23] PartitionCols:_col0 Select Operator [SEL_8] (rows=1 width=93) Output:["_col0","_col1"] @@ -1564,14 +1564,14 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_3] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_36] (rows=1 width=93) + Filter Operator [FIL_35] (rows=1 width=93) predicate:((((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0D)) and ((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (UDFToDouble(key) > 0.0D) and (c_float > 0)) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 7 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_25] + PARTITION-LEVEL SORT [RS_24] PartitionCols:_col0 - Group By Operator [GBY_21] (rows=1 width=85) + Group By Operator [GBY_20] (rows=1 width=85) Output:["_col0"],keys:_col0 Select Operator [SEL_15] (rows=1 width=85) Output:["_col0"] @@ -1582,7 +1582,7 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_12] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_37] (rows=1 width=93) + Filter Operator [FIL_36] (rows=1 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and ((c_int >= 1) or (c_float >= 1)) and (UDFToDouble(key) > 0.0D) and (c_float > 0)) TableScan [TS_9] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -2127,11 +2127,11 @@ Stage-0 limit:-1 Stage-1 Reducer 3 - File Output Operator [FS_22] - Join Operator [JOIN_20] (rows=6 width=227) + File Output Operator [FS_21] + Join Operator [JOIN_19] (rows=6 width=227) Output:["_col0","_col1","_col2"],condition map:[{"":"{\"type\":\"Left Semi\",\"left\":0,\"right\":1}"}],keys:{"0":"_col1","1":"_col0"} <-Reducer 2 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_18] + PARTITION-LEVEL SORT [RS_17] PartitionCols:_col1 Select Operator [SEL_6] (rows=13 width=227) Output:["_col0","_col1","_col2"] @@ -2142,27 +2142,27 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_3] (rows=13 width=235) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(p_size)","count(p_size)"],keys:p_name, p_mfgr - Filter Operator [FIL_23] (rows=26 width=223) + Filter Operator [FIL_22] (rows=26 width=223) predicate:p_name is not null TableScan [TS_0] (rows=26 width=223) default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] <-Reducer 5 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_19] + PARTITION-LEVEL SORT [RS_18] PartitionCols:_col0 - Group By Operator [GBY_17] (rows=13 width=184) + Group By Operator [GBY_16] (rows=13 width=184) Output:["_col0"],keys:_col0 - Select Operator [SEL_12] (rows=26 width=184) + Select Operator [SEL_11] (rows=26 width=184) Output:["_col0"] - Filter Operator [FIL_24] (rows=26 width=491) + Filter Operator [FIL_23] (rows=26 width=491) predicate:first_value_window_0 is not null - PTF Operator [PTF_11] (rows=26 width=491) + PTF Operator [PTF_10] (rows=26 width=491) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_10] (rows=26 width=491) + Select Operator [SEL_9] (rows=26 width=491) Output:["_col1","_col2","_col5"] <-Map 4 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_9] + PARTITION-LEVEL SORT [RS_8] PartitionCols:p_mfgr - TableScan [TS_8] (rows=26 width=223) + TableScan [TS_7] (rows=26 width=223) default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"] PREHOOK: query: explain select * diff --git ql/src/test/results/clientpositive/spark/subquery_notin.q.out ql/src/test/results/clientpositive/spark/subquery_notin.q.out index 8d14106..51b2b44 100644 --- ql/src/test/results/clientpositive/spark/subquery_notin.q.out +++ ql/src/test/results/clientpositive/spark/subquery_notin.q.out @@ -5570,11 +5570,11 @@ STAGE PLANS: Reducer 10 <- Map 9 (PARTITION-LEVEL SORT, 2), Reducer 13 (PARTITION-LEVEL SORT, 2) Reducer 11 <- Reducer 10 (GROUP, 2) Reducer 13 <- Map 12 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 12 (GROUP, 2) - Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Reducer 6 (GROUP, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 2) + Reducer 8 <- Map 12 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -5611,7 +5611,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: char(100)) Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 4 Map Operator Tree: TableScan alias: t2 @@ -5736,19 +5736,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: char(100)) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: char(100)) - Reducer 6 - Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 @@ -5758,8 +5745,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(), count(_col1) - keys: _col0 (type: char(100)) + aggregations: count(), count(_col0) + keys: _col1 (type: char(100)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE @@ -5769,7 +5756,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: char(100)) Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 7 + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1) @@ -5783,6 +5770,19 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: char(100)) Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 8 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(100)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: char(100)) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/subquery_select.q.out ql/src/test/results/clientpositive/spark/subquery_select.q.out index 7d23b78..edbd4d2 100644 --- ql/src/test/results/clientpositive/spark/subquery_select.q.out +++ ql/src/test/results/clientpositive/spark/subquery_select.q.out @@ -4888,7 +4888,7 @@ POSTHOOK: Input: default@part 2 28 46 28 23 28 -Warning: Shuffle Join JOIN[69][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[68][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Work 'Reducer 3' is a cross product PREHOOK: query: explain select t1.p_size, (select count(*) from part t2 where t2.p_partkey = t1.p_partkey group by t2.p_partkey), (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type @@ -5230,7 +5230,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[69][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[68][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Work 'Reducer 3' is a cross product PREHOOK: query: select t1.p_size, (select count(*) from part t2 where t2.p_partkey = t1.p_partkey group by t2.p_partkey), (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type diff --git standalone-metastore/pom.xml standalone-metastore/pom.xml index 20f2e5b..6dad64c 100644 --- standalone-metastore/pom.xml +++ standalone-metastore/pom.xml @@ -583,7 +583,7 @@ run - 2.5.0 + com.google.protobuf:protoc:2.5.0 none ${basedir}/src/main/protobuf/org/apache/hadoop/hive/metastore diff --git standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp index b254f69..8e357f6 100644 --- standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp +++ standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp @@ -23286,6 +23286,7 @@ WMNullableResourcePlan::~WMNullableResourcePlan() throw() { void WMNullableResourcePlan::__set_name(const std::string& val) { this->name = val; +__isset.name = true; } void WMNullableResourcePlan::__set_status(const WMResourcePlanStatus::type val) { @@ -23325,7 +23326,6 @@ uint32_t WMNullableResourcePlan::read(::apache::thrift::protocol::TProtocol* ipr using ::apache::thrift::protocol::TProtocolException; - bool isset_name = false; while (true) { @@ -23338,7 +23338,7 @@ uint32_t WMNullableResourcePlan::read(::apache::thrift::protocol::TProtocol* ipr case 1: if (ftype == ::apache::thrift::protocol::T_STRING) { xfer += iprot->readString(this->name); - isset_name = true; + this->__isset.name = true; } else { xfer += iprot->skip(ftype); } @@ -23394,8 +23394,6 @@ uint32_t WMNullableResourcePlan::read(::apache::thrift::protocol::TProtocol* ipr xfer += iprot->readStructEnd(); - if (!isset_name) - throw TProtocolException(TProtocolException::INVALID_DATA); return xfer; } @@ -23404,10 +23402,11 @@ uint32_t WMNullableResourcePlan::write(::apache::thrift::protocol::TProtocol* op apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); xfer += oprot->writeStructBegin("WMNullableResourcePlan"); - xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->name); - xfer += oprot->writeFieldEnd(); - + if (this->__isset.name) { + xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeString(this->name); + xfer += oprot->writeFieldEnd(); + } if (this->__isset.status) { xfer += oprot->writeFieldBegin("status", ::apache::thrift::protocol::T_I32, 2); xfer += oprot->writeI32((int32_t)this->status); @@ -23471,7 +23470,7 @@ WMNullableResourcePlan& WMNullableResourcePlan::operator=(const WMNullableResour void WMNullableResourcePlan::printTo(std::ostream& out) const { using ::apache::thrift::to_string; out << "WMNullableResourcePlan("; - out << "name=" << to_string(name); + out << "name="; (__isset.name ? (out << to_string(name)) : (out << "")); out << ", " << "status="; (__isset.status ? (out << to_string(status)) : (out << "")); out << ", " << "queryParallelism="; (__isset.queryParallelism ? (out << to_string(queryParallelism)) : (out << "")); out << ", " << "isSetQueryParallelism="; (__isset.isSetQueryParallelism ? (out << to_string(isSetQueryParallelism)) : (out << "")); diff --git standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h index b094831..724f022 100644 --- standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h +++ standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h @@ -9583,7 +9583,8 @@ inline std::ostream& operator<<(std::ostream& out, const WMResourcePlan& obj) } typedef struct _WMNullableResourcePlan__isset { - _WMNullableResourcePlan__isset() : status(false), queryParallelism(false), isSetQueryParallelism(false), defaultPoolPath(false), isSetDefaultPoolPath(false) {} + _WMNullableResourcePlan__isset() : name(false), status(false), queryParallelism(false), isSetQueryParallelism(false), defaultPoolPath(false), isSetDefaultPoolPath(false) {} + bool name :1; bool status :1; bool queryParallelism :1; bool isSetQueryParallelism :1; @@ -9623,7 +9624,9 @@ class WMNullableResourcePlan { bool operator == (const WMNullableResourcePlan & rhs) const { - if (!(name == rhs.name)) + if (__isset.name != rhs.__isset.name) + return false; + else if (__isset.name && !(name == rhs.name)) return false; if (__isset.status != rhs.__isset.status) return false; diff --git standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMNullableResourcePlan.java standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMNullableResourcePlan.java index 7b7cefc..4621e10 100644 --- standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMNullableResourcePlan.java +++ standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMNullableResourcePlan.java @@ -51,7 +51,7 @@ schemes.put(TupleScheme.class, new WMNullableResourcePlanTupleSchemeFactory()); } - private String name; // required + private String name; // optional private WMResourcePlanStatus status; // optional private int queryParallelism; // optional private boolean isSetQueryParallelism; // optional @@ -140,11 +140,11 @@ public String getFieldName() { private static final int __ISSETQUERYPARALLELISM_ISSET_ID = 1; private static final int __ISSETDEFAULTPOOLPATH_ISSET_ID = 2; private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.STATUS,_Fields.QUERY_PARALLELISM,_Fields.IS_SET_QUERY_PARALLELISM,_Fields.DEFAULT_POOL_PATH,_Fields.IS_SET_DEFAULT_POOL_PATH}; + private static final _Fields optionals[] = {_Fields.NAME,_Fields.STATUS,_Fields.QUERY_PARALLELISM,_Fields.IS_SET_QUERY_PARALLELISM,_Fields.DEFAULT_POOL_PATH,_Fields.IS_SET_DEFAULT_POOL_PATH}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.NAME, new org.apache.thrift.meta_data.FieldMetaData("name", org.apache.thrift.TFieldRequirementType.REQUIRED, + tmpMap.put(_Fields.NAME, new org.apache.thrift.meta_data.FieldMetaData("name", org.apache.thrift.TFieldRequirementType.OPTIONAL, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.OPTIONAL, new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, WMResourcePlanStatus.class))); @@ -163,13 +163,6 @@ public String getFieldName() { public WMNullableResourcePlan() { } - public WMNullableResourcePlan( - String name) - { - this(); - this.name = name; - } - /** * Performs a deep copy on other. */ @@ -644,13 +637,15 @@ public String toString() { StringBuilder sb = new StringBuilder("WMNullableResourcePlan("); boolean first = true; - sb.append("name:"); - if (this.name == null) { - sb.append("null"); - } else { - sb.append(this.name); + if (isSetName()) { + sb.append("name:"); + if (this.name == null) { + sb.append("null"); + } else { + sb.append(this.name); + } + first = false; } - first = false; if (isSetStatus()) { if (!first) sb.append(", "); sb.append("status:"); @@ -695,10 +690,6 @@ public String toString() { public void validate() throws org.apache.thrift.TException { // check for required fields - if (!isSetName()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'name' is unset! Struct:" + toString()); - } - // check for sub-struct validity } @@ -800,9 +791,11 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, WMNullableResource oprot.writeStructBegin(STRUCT_DESC); if (struct.name != null) { - oprot.writeFieldBegin(NAME_FIELD_DESC); - oprot.writeString(struct.name); - oprot.writeFieldEnd(); + if (struct.isSetName()) { + oprot.writeFieldBegin(NAME_FIELD_DESC); + oprot.writeString(struct.name); + oprot.writeFieldEnd(); + } } if (struct.status != null) { if (struct.isSetStatus()) { @@ -850,24 +843,29 @@ public WMNullableResourcePlanTupleScheme getScheme() { @Override public void write(org.apache.thrift.protocol.TProtocol prot, WMNullableResourcePlan struct) throws org.apache.thrift.TException { TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeString(struct.name); BitSet optionals = new BitSet(); - if (struct.isSetStatus()) { + if (struct.isSetName()) { optionals.set(0); } - if (struct.isSetQueryParallelism()) { + if (struct.isSetStatus()) { optionals.set(1); } - if (struct.isSetIsSetQueryParallelism()) { + if (struct.isSetQueryParallelism()) { optionals.set(2); } - if (struct.isSetDefaultPoolPath()) { + if (struct.isSetIsSetQueryParallelism()) { optionals.set(3); } - if (struct.isSetIsSetDefaultPoolPath()) { + if (struct.isSetDefaultPoolPath()) { optionals.set(4); } - oprot.writeBitSet(optionals, 5); + if (struct.isSetIsSetDefaultPoolPath()) { + optionals.set(5); + } + oprot.writeBitSet(optionals, 6); + if (struct.isSetName()) { + oprot.writeString(struct.name); + } if (struct.isSetStatus()) { oprot.writeI32(struct.status.getValue()); } @@ -888,26 +886,28 @@ public void write(org.apache.thrift.protocol.TProtocol prot, WMNullableResourceP @Override public void read(org.apache.thrift.protocol.TProtocol prot, WMNullableResourcePlan struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; - struct.name = iprot.readString(); - struct.setNameIsSet(true); - BitSet incoming = iprot.readBitSet(5); + BitSet incoming = iprot.readBitSet(6); if (incoming.get(0)) { + struct.name = iprot.readString(); + struct.setNameIsSet(true); + } + if (incoming.get(1)) { struct.status = org.apache.hadoop.hive.metastore.api.WMResourcePlanStatus.findByValue(iprot.readI32()); struct.setStatusIsSet(true); } - if (incoming.get(1)) { + if (incoming.get(2)) { struct.queryParallelism = iprot.readI32(); struct.setQueryParallelismIsSet(true); } - if (incoming.get(2)) { + if (incoming.get(3)) { struct.isSetQueryParallelism = iprot.readBool(); struct.setIsSetQueryParallelismIsSet(true); } - if (incoming.get(3)) { + if (incoming.get(4)) { struct.defaultPoolPath = iprot.readString(); struct.setDefaultPoolPathIsSet(true); } - if (incoming.get(4)) { + if (incoming.get(5)) { struct.isSetDefaultPoolPath = iprot.readBool(); struct.setIsSetDefaultPoolPathIsSet(true); } diff --git standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py index 82539ed..fd157b2 100644 --- standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py +++ standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py @@ -16326,8 +16326,6 @@ def write(self, oprot): oprot.writeStructEnd() def validate(self): - if self.name is None: - raise TProtocol.TProtocolException(message='Required field name is unset!') return diff --git standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb index 16c814e..aa93158 100644 --- standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb +++ standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb @@ -3688,7 +3688,7 @@ class WMNullableResourcePlan ISSETDEFAULTPOOLPATH = 7 FIELDS = { - NAME => {:type => ::Thrift::Types::STRING, :name => 'name'}, + NAME => {:type => ::Thrift::Types::STRING, :name => 'name', :optional => true}, STATUS => {:type => ::Thrift::Types::I32, :name => 'status', :optional => true, :enum_class => ::WMResourcePlanStatus}, QUERYPARALLELISM => {:type => ::Thrift::Types::I32, :name => 'queryParallelism', :optional => true}, ISSETQUERYPARALLELISM => {:type => ::Thrift::Types::BOOL, :name => 'isSetQueryParallelism', :optional => true}, @@ -3699,7 +3699,6 @@ class WMNullableResourcePlan def struct_fields; FIELDS; end def validate - raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field name is unset!') unless @name unless @status.nil? || ::WMResourcePlanStatus::VALID_VALUES.include?(@status) raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Invalid value of field status!') end diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java index 2534fa2..280075d 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java @@ -20,9 +20,14 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Table; +import com.google.common.collect.ImmutableList; + +import java.util.List; + /** * HiveMetaHook defines notification methods which are invoked as part * of transactions against the metastore, allowing external catalogs @@ -36,6 +41,11 @@ @InterfaceAudience.Public @InterfaceStability.Stable public interface HiveMetaHook { + + public String ALTER_TABLE_OPERATION_TYPE = "alterTableOpType"; + + public List allowedAlterTypes = ImmutableList.of("ADDPROPS", "DROPPROPS"); + /** * Called before a new table definition is added to the metastore * during CREATE TABLE. @@ -92,4 +102,21 @@ public void rollbackDropTable(Table table) */ public void commitDropTable(Table table, boolean deleteData) throws MetaException; + + /** + * Called before a table is altered in the metastore + * during ALTER TABLE. + * + * @param table new table definition + */ + public default void preAlterTable(Table table, EnvironmentContext context) throws MetaException { + String alterOpType = context.getProperties().get(ALTER_TABLE_OPERATION_TYPE); + // By default allow only ADDPROPS and DROPPROPS. + // alterOpType is null in case of stats update. + if (alterOpType != null && !allowedAlterTypes.contains(alterOpType)){ + throw new MetaException( + "ALTER TABLE can not be used for " + alterOpType + " to a non-native table "); + } + } + } diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 519e8fe..8a5de09 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -2810,6 +2810,9 @@ public Partition append_partition(final String dbName, final String tableName, public Partition append_partition_with_environment_context(final String dbName, final String tableName, final List part_vals, final EnvironmentContext envContext) throws InvalidObjectException, AlreadyExistsException, MetaException { + if (part_vals == null) { + throw new MetaException("The partition values must not be null."); + } startPartitionFunction("append_partition", dbName, tableName, part_vals); if (LOG.isDebugEnabled()) { for (String part : part_vals) { diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index ae42077..d4bdcf1 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -410,6 +410,10 @@ public void alter_table(String defaultDatabaseName, String tblName, Table table, @Override public void alter_table_with_environmentContext(String dbname, String tbl_name, Table new_tbl, EnvironmentContext envContext) throws InvalidOperationException, MetaException, TException { + HiveMetaHook hook = getHook(new_tbl); + if (hook != null) { + hook.preAlterTable(new_tbl, envContext); + } client.alter_table_with_environment_context(dbname, tbl_name, new_tbl, envContext); } diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 3e1fea9..de94172 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -1276,6 +1276,8 @@ public boolean dropTable(String dbName, String tableName) throws MetaException, private boolean dropCreationMetadata(String dbName, String tableName) throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException { boolean success = false; + dbName = normalizeIdentifier(dbName); + tableName = normalizeIdentifier(tableName); try { openTransaction(); MCreationMetadata mcm = getCreationMetadata(dbName, tableName); diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index e373753..b477ce5 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -534,7 +534,11 @@ public static boolean isExternalTable(Table table) { return false; } - return "TRUE".equalsIgnoreCase(params.get("EXTERNAL")); + return isExternal(params); + } + + public static boolean isExternal(Map tableParams){ + return "TRUE".equalsIgnoreCase(tableParams.get("EXTERNAL")); } // check if stats need to be (re)calculated diff --git standalone-metastore/src/main/thrift/hive_metastore.thrift standalone-metastore/src/main/thrift/hive_metastore.thrift index fb334c0..68d7b45 100644 --- standalone-metastore/src/main/thrift/hive_metastore.thrift +++ standalone-metastore/src/main/thrift/hive_metastore.thrift @@ -1186,7 +1186,7 @@ struct WMResourcePlan { } struct WMNullableResourcePlan { - 1: required string name; + 1: optional string name; 2: optional WMResourcePlanStatus status; 4: optional i32 queryParallelism; 5: optional bool isSetQueryParallelism; diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestAppendPartitions.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestAppendPartitions.java index b67f33d..79d0953 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestAppendPartitions.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestAppendPartitions.java @@ -41,7 +41,6 @@ import org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder; import org.apache.hadoop.hive.metastore.client.builder.TableBuilder; import org.apache.hadoop.hive.metastore.minihms.AbstractMetaStoreService; -import org.apache.thrift.transport.TTransportException; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -229,16 +228,11 @@ public void testAppendPartitionEmptyPartValues() throws Exception { client.appendPartition(table.getDbName(), table.getTableName(), new ArrayList()); } - @Test + @Test(expected = MetaException.class) public void testAppendPartitionNullPartValues() throws Exception { - try { - Table table = tableWithPartitions; - client.appendPartition(table.getDbName(), table.getTableName(), (List) null); - Assert.fail("Exception should have been thrown."); - } catch (TTransportException | NullPointerException e) { - // TODO: NPE should not be thrown - } + Table table = tableWithPartitions; + client.appendPartition(table.getDbName(), table.getTableName(), (List) null); } @Test